You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2013/03/27 22:48:11 UTC

svn commit: r1461862 [2/2] - in /commons/proper/math/trunk/src: main/java/org/apache/commons/math3/ml/ main/java/org/apache/commons/math3/ml/clustering/ main/java/org/apache/commons/math3/ml/distance/ test/java/org/apache/commons/math3/ml/ test/java/or...

Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java?rev=1461862&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java (added)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.ml.clustering;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.math3.exception.NumberIsTooSmallException;
+import org.apache.commons.math3.ml.distance.EuclideanDistance;
+import org.apache.commons.math3.random.JDKRandomGenerator;
+import org.apache.commons.math3.random.RandomGenerator;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class KMeansPlusPlusClustererTest {
+
+    private RandomGenerator random;
+
+    @Before
+    public void setUp() {
+        random = new JDKRandomGenerator();
+        random.setSeed(1746432956321l);        
+    }
+
+    /**
+     * JIRA: MATH-305
+     *
+     * Two points, one cluster, one iteration
+     */
+    @Test
+    public void testPerformClusterAnalysisDegenerate() {
+        KMeansPlusPlusClusterer<DoublePoint> transformer =
+                new KMeansPlusPlusClusterer<DoublePoint>(1, 1);
+
+        DoublePoint[] points = new DoublePoint[] {
+                new DoublePoint(new int[] { 1959, 325100 }),
+                new DoublePoint(new int[] { 1960, 373200 }), };
+        List<? extends Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
+        Assert.assertEquals(1, clusters.size());
+        Assert.assertEquals(2, (clusters.get(0).getPoints().size()));
+        DoublePoint pt1 = new DoublePoint(new int[] { 1959, 325100 });
+        DoublePoint pt2 = new DoublePoint(new int[] { 1960, 373200 });
+        Assert.assertTrue(clusters.get(0).getPoints().contains(pt1));
+        Assert.assertTrue(clusters.get(0).getPoints().contains(pt2));
+
+    }
+
+    @Test
+    public void testCertainSpace() {
+        KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = {
+            KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE,
+            KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER,
+            KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT
+        };
+        for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) {
+            int numberOfVariables = 27;
+            // initialise testvalues
+            int position1 = 1;
+            int position2 = position1 + numberOfVariables;
+            int position3 = position2 + numberOfVariables;
+            int position4 = position3 + numberOfVariables;
+            // testvalues will be multiplied
+            int multiplier = 1000000;
+
+            DoublePoint[] breakingPoints = new DoublePoint[numberOfVariables];
+            // define the space which will break the cluster algorithm
+            for (int i = 0; i < numberOfVariables; i++) {
+                int points[] = { position1, position2, position3, position4 };
+                // multiply the values
+                for (int j = 0; j < points.length; j++) {
+                    points[j] = points[j] * multiplier;
+                }
+                DoublePoint DoublePoint = new DoublePoint(points);
+                breakingPoints[i] = DoublePoint;
+                position1 = position1 + numberOfVariables;
+                position2 = position2 + numberOfVariables;
+                position3 = position3 + numberOfVariables;
+                position4 = position4 + numberOfVariables;
+            }
+
+            for (int n = 2; n < 27; ++n) {
+                KMeansPlusPlusClusterer<DoublePoint> transformer =
+                    new KMeansPlusPlusClusterer<DoublePoint>(n, 100, new EuclideanDistance(), random, strategy);
+
+                List<? extends Cluster<DoublePoint>> clusters =
+                        transformer.cluster(Arrays.asList(breakingPoints));
+
+                Assert.assertEquals(n, clusters.size());
+                int sum = 0;
+                for (Cluster<DoublePoint> cluster : clusters) {
+                    sum += cluster.getPoints().size();
+                }
+                Assert.assertEquals(numberOfVariables, sum);
+            }
+        }
+
+    }
+
+    /**
+     * A helper class for testSmallDistances(). This class is similar to DoublePoint, but
+     * it defines a different distanceFrom() method that tends to return distances less than 1.
+     */
+    private class CloseDistance extends EuclideanDistance {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public double compute(double[] a, double[] b) {
+            return super.compute(a, b) * 0.001;
+        }
+    }
+
+    /**
+     * Test points that are very close together. See issue MATH-546.
+     */
+    @Test
+    public void testSmallDistances() {
+        // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a
+        // small distance.
+        int[] repeatedArray = { 0 };
+        int[] uniqueArray = { 1 };
+        DoublePoint repeatedPoint = new DoublePoint(repeatedArray);
+        DoublePoint uniquePoint = new DoublePoint(uniqueArray);
+
+        Collection<DoublePoint> points = new ArrayList<DoublePoint>();
+        final int NUM_REPEATED_POINTS = 10 * 1000;
+        for (int i = 0; i < NUM_REPEATED_POINTS; ++i) {
+            points.add(repeatedPoint);
+        }
+        points.add(uniquePoint);
+
+        // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial
+        // cluster centers).
+        final long RANDOM_SEED = 0;
+        final int NUM_CLUSTERS = 2;
+        final int NUM_ITERATIONS = 0;
+        random.setSeed(RANDOM_SEED);
+        
+        KMeansPlusPlusClusterer<DoublePoint> clusterer =
+            new KMeansPlusPlusClusterer<DoublePoint>(NUM_CLUSTERS, NUM_ITERATIONS,
+                    new CloseDistance(), random);
+        List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);
+
+        // Check that one of the chosen centers is the unique point.
+        boolean uniquePointIsCenter = false;
+        for (CentroidCluster<DoublePoint> cluster : clusters) {
+            if (cluster.getCenter().equals(uniquePoint)) {
+                uniquePointIsCenter = true;
+            }
+        }
+        Assert.assertTrue(uniquePointIsCenter);
+    }
+    
+    /**
+     * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
+     */
+    @Test(expected=NumberIsTooSmallException.class)
+    public void testPerformClusterAnalysisToManyClusters() {
+        KMeansPlusPlusClusterer<DoublePoint> transformer = 
+            new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
+        
+        DoublePoint[] points = new DoublePoint[] {
+            new DoublePoint(new int[] {
+                1959, 325100
+            }), new DoublePoint(new int[] {
+                1960, 373200
+            })
+        };
+        
+        transformer.cluster(Arrays.asList(points));
+
+    }
+
+}

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:keywords = Id Revision HeadURL

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java?rev=1461862&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java (added)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.ml.clustering;
+
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class MultiKMeansPlusPlusClustererTest {
+
+    @Test
+    public void dimension2() {
+        MultiKMeansPlusPlusClusterer<DoublePoint> transformer =
+            new MultiKMeansPlusPlusClusterer<DoublePoint>(
+                    new KMeansPlusPlusClusterer<DoublePoint>(3, 10), 5);
+        
+        DoublePoint[] points = new DoublePoint[] {
+
+                // first expected cluster
+                new DoublePoint(new int[] { -15,  3 }),
+                new DoublePoint(new int[] { -15,  4 }),
+                new DoublePoint(new int[] { -15,  5 }),
+                new DoublePoint(new int[] { -14,  3 }),
+                new DoublePoint(new int[] { -14,  5 }),
+                new DoublePoint(new int[] { -13,  3 }),
+                new DoublePoint(new int[] { -13,  4 }),
+                new DoublePoint(new int[] { -13,  5 }),
+
+                // second expected cluster
+                new DoublePoint(new int[] { -1,  0 }),
+                new DoublePoint(new int[] { -1, -1 }),
+                new DoublePoint(new int[] {  0, -1 }),
+                new DoublePoint(new int[] {  1, -1 }),
+                new DoublePoint(new int[] {  1, -2 }),
+
+                // third expected cluster
+                new DoublePoint(new int[] { 13,  3 }),
+                new DoublePoint(new int[] { 13,  4 }),
+                new DoublePoint(new int[] { 14,  4 }),
+                new DoublePoint(new int[] { 14,  7 }),
+                new DoublePoint(new int[] { 16,  5 }),
+                new DoublePoint(new int[] { 16,  6 }),
+                new DoublePoint(new int[] { 17,  4 }),
+                new DoublePoint(new int[] { 17,  7 })
+
+        };
+        List<CentroidCluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
+
+        Assert.assertEquals(3, clusters.size());
+        boolean cluster1Found = false;
+        boolean cluster2Found = false;
+        boolean cluster3Found = false;
+        double epsilon = 1e-6;
+        for (CentroidCluster<DoublePoint> cluster : clusters) {
+            Clusterable center = cluster.getCenter();
+            double[] point = center.getPoint();
+            if (point[0] < 0) {
+                cluster1Found = true;
+                Assert.assertEquals(8, cluster.getPoints().size());
+                Assert.assertEquals(-14, point[0], epsilon);
+                Assert.assertEquals( 4, point[1], epsilon);
+            } else if (point[1] < 0) {
+                cluster2Found = true;
+                Assert.assertEquals(5, cluster.getPoints().size());
+                Assert.assertEquals( 0, point[0], epsilon);
+                Assert.assertEquals(-1, point[1], epsilon);
+            } else {
+                cluster3Found = true;
+                Assert.assertEquals(8, cluster.getPoints().size());
+                Assert.assertEquals(15, point[0], epsilon);
+                Assert.assertEquals(5, point[1], epsilon);
+            }
+        }
+        Assert.assertTrue(cluster1Found);
+        Assert.assertTrue(cluster2Found);
+        Assert.assertTrue(cluster3Found);
+
+    }
+
+}

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:keywords = Id Revision HeadURL

Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain