You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2013/03/27 22:48:11 UTC
svn commit: r1461862 [2/2] - in /commons/proper/math/trunk/src:
main/java/org/apache/commons/math3/ml/
main/java/org/apache/commons/math3/ml/clustering/
main/java/org/apache/commons/math3/ml/distance/
test/java/org/apache/commons/math3/ml/ test/java/or...
Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java?rev=1461862&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java (added)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.ml.clustering;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.math3.exception.NumberIsTooSmallException;
+import org.apache.commons.math3.ml.distance.EuclideanDistance;
+import org.apache.commons.math3.random.JDKRandomGenerator;
+import org.apache.commons.math3.random.RandomGenerator;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class KMeansPlusPlusClustererTest {
+
+ private RandomGenerator random;
+
+ @Before
+ public void setUp() {
+ random = new JDKRandomGenerator();
+ random.setSeed(1746432956321l);
+ }
+
+ /**
+ * JIRA: MATH-305
+ *
+ * Two points, one cluster, one iteration
+ */
+ @Test
+ public void testPerformClusterAnalysisDegenerate() {
+ KMeansPlusPlusClusterer<DoublePoint> transformer =
+ new KMeansPlusPlusClusterer<DoublePoint>(1, 1);
+
+ DoublePoint[] points = new DoublePoint[] {
+ new DoublePoint(new int[] { 1959, 325100 }),
+ new DoublePoint(new int[] { 1960, 373200 }), };
+ List<? extends Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
+ Assert.assertEquals(1, clusters.size());
+ Assert.assertEquals(2, (clusters.get(0).getPoints().size()));
+ DoublePoint pt1 = new DoublePoint(new int[] { 1959, 325100 });
+ DoublePoint pt2 = new DoublePoint(new int[] { 1960, 373200 });
+ Assert.assertTrue(clusters.get(0).getPoints().contains(pt1));
+ Assert.assertTrue(clusters.get(0).getPoints().contains(pt2));
+
+ }
+
+ @Test
+ public void testCertainSpace() {
+ KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = {
+ KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE,
+ KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER,
+ KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT
+ };
+ for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) {
+ int numberOfVariables = 27;
+ // initialise testvalues
+ int position1 = 1;
+ int position2 = position1 + numberOfVariables;
+ int position3 = position2 + numberOfVariables;
+ int position4 = position3 + numberOfVariables;
+ // testvalues will be multiplied
+ int multiplier = 1000000;
+
+ DoublePoint[] breakingPoints = new DoublePoint[numberOfVariables];
+ // define the space which will break the cluster algorithm
+ for (int i = 0; i < numberOfVariables; i++) {
+ int points[] = { position1, position2, position3, position4 };
+ // multiply the values
+ for (int j = 0; j < points.length; j++) {
+ points[j] = points[j] * multiplier;
+ }
+ DoublePoint DoublePoint = new DoublePoint(points);
+ breakingPoints[i] = DoublePoint;
+ position1 = position1 + numberOfVariables;
+ position2 = position2 + numberOfVariables;
+ position3 = position3 + numberOfVariables;
+ position4 = position4 + numberOfVariables;
+ }
+
+ for (int n = 2; n < 27; ++n) {
+ KMeansPlusPlusClusterer<DoublePoint> transformer =
+ new KMeansPlusPlusClusterer<DoublePoint>(n, 100, new EuclideanDistance(), random, strategy);
+
+ List<? extends Cluster<DoublePoint>> clusters =
+ transformer.cluster(Arrays.asList(breakingPoints));
+
+ Assert.assertEquals(n, clusters.size());
+ int sum = 0;
+ for (Cluster<DoublePoint> cluster : clusters) {
+ sum += cluster.getPoints().size();
+ }
+ Assert.assertEquals(numberOfVariables, sum);
+ }
+ }
+
+ }
+
+ /**
+ * A helper class for testSmallDistances(). This class is similar to DoublePoint, but
+ * it defines a different distanceFrom() method that tends to return distances less than 1.
+ */
+ private class CloseDistance extends EuclideanDistance {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public double compute(double[] a, double[] b) {
+ return super.compute(a, b) * 0.001;
+ }
+ }
+
+ /**
+ * Test points that are very close together. See issue MATH-546.
+ */
+ @Test
+ public void testSmallDistances() {
+ // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a
+ // small distance.
+ int[] repeatedArray = { 0 };
+ int[] uniqueArray = { 1 };
+ DoublePoint repeatedPoint = new DoublePoint(repeatedArray);
+ DoublePoint uniquePoint = new DoublePoint(uniqueArray);
+
+ Collection<DoublePoint> points = new ArrayList<DoublePoint>();
+ final int NUM_REPEATED_POINTS = 10 * 1000;
+ for (int i = 0; i < NUM_REPEATED_POINTS; ++i) {
+ points.add(repeatedPoint);
+ }
+ points.add(uniquePoint);
+
+ // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial
+ // cluster centers).
+ final long RANDOM_SEED = 0;
+ final int NUM_CLUSTERS = 2;
+ final int NUM_ITERATIONS = 0;
+ random.setSeed(RANDOM_SEED);
+
+ KMeansPlusPlusClusterer<DoublePoint> clusterer =
+ new KMeansPlusPlusClusterer<DoublePoint>(NUM_CLUSTERS, NUM_ITERATIONS,
+ new CloseDistance(), random);
+ List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);
+
+ // Check that one of the chosen centers is the unique point.
+ boolean uniquePointIsCenter = false;
+ for (CentroidCluster<DoublePoint> cluster : clusters) {
+ if (cluster.getCenter().equals(uniquePoint)) {
+ uniquePointIsCenter = true;
+ }
+ }
+ Assert.assertTrue(uniquePointIsCenter);
+ }
+
+ /**
+ * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
+ */
+ @Test(expected=NumberIsTooSmallException.class)
+ public void testPerformClusterAnalysisToManyClusters() {
+ KMeansPlusPlusClusterer<DoublePoint> transformer =
+ new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
+
+ DoublePoint[] points = new DoublePoint[] {
+ new DoublePoint(new int[] {
+ 1959, 325100
+ }), new DoublePoint(new int[] {
+ 1960, 373200
+ })
+ };
+
+ transformer.cluster(Arrays.asList(points));
+
+ }
+
+}
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:keywords = Id Revision HeadURL
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java?rev=1461862&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java (added)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.ml.clustering;
+
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class MultiKMeansPlusPlusClustererTest {
+
+ @Test
+ public void dimension2() {
+ MultiKMeansPlusPlusClusterer<DoublePoint> transformer =
+ new MultiKMeansPlusPlusClusterer<DoublePoint>(
+ new KMeansPlusPlusClusterer<DoublePoint>(3, 10), 5);
+
+ DoublePoint[] points = new DoublePoint[] {
+
+ // first expected cluster
+ new DoublePoint(new int[] { -15, 3 }),
+ new DoublePoint(new int[] { -15, 4 }),
+ new DoublePoint(new int[] { -15, 5 }),
+ new DoublePoint(new int[] { -14, 3 }),
+ new DoublePoint(new int[] { -14, 5 }),
+ new DoublePoint(new int[] { -13, 3 }),
+ new DoublePoint(new int[] { -13, 4 }),
+ new DoublePoint(new int[] { -13, 5 }),
+
+ // second expected cluster
+ new DoublePoint(new int[] { -1, 0 }),
+ new DoublePoint(new int[] { -1, -1 }),
+ new DoublePoint(new int[] { 0, -1 }),
+ new DoublePoint(new int[] { 1, -1 }),
+ new DoublePoint(new int[] { 1, -2 }),
+
+ // third expected cluster
+ new DoublePoint(new int[] { 13, 3 }),
+ new DoublePoint(new int[] { 13, 4 }),
+ new DoublePoint(new int[] { 14, 4 }),
+ new DoublePoint(new int[] { 14, 7 }),
+ new DoublePoint(new int[] { 16, 5 }),
+ new DoublePoint(new int[] { 16, 6 }),
+ new DoublePoint(new int[] { 17, 4 }),
+ new DoublePoint(new int[] { 17, 7 })
+
+ };
+ List<CentroidCluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
+
+ Assert.assertEquals(3, clusters.size());
+ boolean cluster1Found = false;
+ boolean cluster2Found = false;
+ boolean cluster3Found = false;
+ double epsilon = 1e-6;
+ for (CentroidCluster<DoublePoint> cluster : clusters) {
+ Clusterable center = cluster.getCenter();
+ double[] point = center.getPoint();
+ if (point[0] < 0) {
+ cluster1Found = true;
+ Assert.assertEquals(8, cluster.getPoints().size());
+ Assert.assertEquals(-14, point[0], epsilon);
+ Assert.assertEquals( 4, point[1], epsilon);
+ } else if (point[1] < 0) {
+ cluster2Found = true;
+ Assert.assertEquals(5, cluster.getPoints().size());
+ Assert.assertEquals( 0, point[0], epsilon);
+ Assert.assertEquals(-1, point[1], epsilon);
+ } else {
+ cluster3Found = true;
+ Assert.assertEquals(8, cluster.getPoints().size());
+ Assert.assertEquals(15, point[0], epsilon);
+ Assert.assertEquals(5, point[1], epsilon);
+ }
+ }
+ Assert.assertTrue(cluster1Found);
+ Assert.assertTrue(cluster2Found);
+ Assert.assertTrue(cluster3Found);
+
+ }
+
+}
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:keywords = Id Revision HeadURL
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain