You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2013/04/14 18:09:20 UTC
svn commit: r1467797 - in /commons/proper/math/trunk/src: changes/changes.xml
main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java
test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java
Author: tn
Date: Sun Apr 14 16:09:18 2013
New Revision: 1467797
URL: http://svn.apache.org/r1467797
Log:
[MATH-898] Finish FuzzyKMeans clustering algorithm.
Modified:
commons/proper/math/trunk/src/changes/changes.xml
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java
Modified: commons/proper/math/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/changes/changes.xml?rev=1467797&r1=1467796&r2=1467797&view=diff
==============================================================================
--- commons/proper/math/trunk/src/changes/changes.xml (original)
+++ commons/proper/math/trunk/src/changes/changes.xml Sun Apr 14 16:09:18 2013
@@ -51,6 +51,9 @@ If the output is not quite correct, chec
</properties>
<body>
<release version="x.y" date="TBD" description="TBD">
+ <action dev="tn" type="add" issue="MATH-898">
+ Added "FuzzyKMeansClusterer" to "o.a.c.m.ml.clustering" package.
+ </action>
<action dev="luc" type="fix" issue="MATH-965" >
Fixed inconsistent dimensions preventing use of secondary states
in ODE events.
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java?rev=1467797&r1=1467796&r2=1467797&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClusterer.java Sun Apr 14 16:09:18 2013
@@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.List;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.MathIllegalStateException;
import org.apache.commons.math3.exception.NumberIsTooSmallException;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
@@ -34,15 +35,32 @@ import org.apache.commons.math3.util.Mat
import org.apache.commons.math3.util.MathUtils;
/**
- * Fuzzy K-Means algorithm.
+ * Fuzzy K-Means clustering algorithm.
* <p>
- * TODO
+ * The Fuzzy K-Means algorithm is a variation of the classical K-Means algorithm, with the
+ * major difference that a single data point is not uniquely assigned to a single cluster.
+ * Instead, each point i has a set of weights u<sub>ij</sub> which indicate the degree of membership
+ * to the cluster j.
+ * <p>
+ * The algorithm then tries to minimize the objective function:
+ * <pre>
+ * J = ∑<sub>i=1..C</sub>∑<sub>k=1..N</sub> u<sub>ik</sub><sup>m</sup>d<sub>ik</sub><sup>2</sup>
+ * </pre>
+ * with d<sub>ik</sub> being the distance between data point i and the cluster center k.
* <p>
* The algorithm requires two parameters:
* <ul>
- * <li>k: the number of clusters
- * <li>fuzzyness: ...
+ * <li>k: the number of clusters
+ * <li>fuzziness: determines the level of cluster fuzziness, larger values lead to fuzzier clusters
+ * </ul>
+ * Additional, optional parameters:
+ * <ul>
+ * <li>maxIterations: the maximum number of iterations
+ * <li>epsilon: the convergence criteria, default is 1e-3
* </ul>
+ * <p>
+ * The fuzzy variant of the K-Means algorithm is more robust with regard to the selection
+ * of the initial cluster centers.
*
* @param <T> type of the points to cluster
* @version $Id$
@@ -59,8 +77,8 @@ public class FuzzyKMeansClusterer<T exte
/** The maximum number of iterations. */
private final int maxIterations;
- /** The fuzzyness factor. */
- private final double fuzzyness;
+ /** The fuzziness factor. */
+ private final double fuzziness;
/** The convergence criteria. */
private final double epsilon;
@@ -83,53 +101,53 @@ public class FuzzyKMeansClusterer<T exte
* The euclidean distance will be used as default distance measure.
*
* @param k the number of clusters to split the data into
- * @param fuzzyness the fuzzyness factor, must be > 1.0
- * @throws NumberIsTooSmallException if {@code fuzzyness <= 1.0}
+ * @param fuzziness the fuzziness factor, must be > 1.0
+ * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
*/
- public FuzzyKMeansClusterer(final int k, final double fuzzyness) throws NumberIsTooSmallException {
- this(k, fuzzyness, -1, new EuclideanDistance());
+ public FuzzyKMeansClusterer(final int k, final double fuzziness) throws NumberIsTooSmallException {
+ this(k, fuzziness, -1, new EuclideanDistance());
}
/**
* Creates a new instance of a FuzzyKMeansClusterer.
*
* @param k the number of clusters to split the data into
- * @param fuzzyness the fuzzyness factor, must be > 1.0
+ * @param fuzziness the fuzziness factor, must be > 1.0
* @param maxIterations the maximum number of iterations to run the algorithm for.
* If negative, no maximum will be used.
* @param measure the distance measure to use
- * @throws NumberIsTooSmallException if {@code fuzzyness <= 1.0}
+ * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
*/
- public FuzzyKMeansClusterer(final int k, final double fuzzyness,
+ public FuzzyKMeansClusterer(final int k, final double fuzziness,
final int maxIterations, final DistanceMeasure measure)
throws NumberIsTooSmallException {
- this(k, fuzzyness, maxIterations, measure, DEFAULT_EPSILON, new JDKRandomGenerator());
+ this(k, fuzziness, maxIterations, measure, DEFAULT_EPSILON, new JDKRandomGenerator());
}
/**
* Creates a new instance of a FuzzyKMeansClusterer.
*
* @param k the number of clusters to split the data into
- * @param fuzzyness the fuzzyness factor, must be > 1.0
+ * @param fuzziness the fuzziness factor, must be > 1.0
* @param maxIterations the maximum number of iterations to run the algorithm for.
* If negative, no maximum will be used.
* @param measure the distance measure to use
- * @param epsilon the convergence criteria
+ * @param epsilon the convergence criteria (default is 1e-3)
* @param random random generator to use for choosing initial centers
- * @throws NumberIsTooSmallException if {@code fuzzyness <= 1.0}
+ * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
*/
- public FuzzyKMeansClusterer(final int k, final double fuzzyness,
+ public FuzzyKMeansClusterer(final int k, final double fuzziness,
final int maxIterations, final DistanceMeasure measure,
final double epsilon, final RandomGenerator random)
throws NumberIsTooSmallException {
super(measure);
- if (fuzzyness <= 1.0d) {
- throw new NumberIsTooSmallException(fuzzyness, 1.0, false);
+ if (fuzziness <= 1.0d) {
+ throw new NumberIsTooSmallException(fuzziness, 1.0, false);
}
this.k = k;
- this.fuzzyness = fuzzyness;
+ this.fuzziness = fuzziness;
this.maxIterations = maxIterations;
this.epsilon = epsilon;
this.random = random;
@@ -148,11 +166,11 @@ public class FuzzyKMeansClusterer<T exte
}
/**
- * Returns the fuzzyness factor used by this instance.
- * @return the fuzzyness factor
+ * Returns the fuzziness factor used by this instance.
+ * @return the fuzziness factor
*/
- public double getFuzzyness() {
- return fuzzyness;
+ public double getFuzziness() {
+ return fuzziness;
}
/**
@@ -164,6 +182,14 @@ public class FuzzyKMeansClusterer<T exte
}
/**
+ * Returns the convergence criteria used by this instance.
+ * @return the convergence criteria
+ */
+ public double getEpsilon() {
+ return epsilon;
+ }
+
+ /**
* Returns the random generator this instance will use.
* @return the random generator
*/
@@ -179,8 +205,12 @@ public class FuzzyKMeansClusterer<T exte
* to cluster {@code j}.
*
* @return the membership matrix
+ * @throws MathIllegalStateException if {@link #cluster(Collection)} has not been called before
*/
public RealMatrix getMembershipMatrix() {
+ if (membershipMatrix == null) {
+ throw new MathIllegalStateException();
+ }
return MatrixUtils.createRealMatrix(membershipMatrix);
}
@@ -205,12 +235,12 @@ public class FuzzyKMeansClusterer<T exte
/**
* Get the value of the objective function.
- * @return the objective function as double value, or {@code 0.0} if {@link #cluster(Collection)}
- * has not been called before.
+ * @return the objective function evaluation as double value
+ * @throws MathIllegalStateException if {@link #cluster(Collection)} has not been called before
*/
public double getObjectiveFunctionValue() {
if (points == null || clusters == null) {
- return 0;
+ throw new MathIllegalStateException();
}
int i = 0;
@@ -218,8 +248,8 @@ public class FuzzyKMeansClusterer<T exte
for (final T point : points) {
int j = 0;
for (final CentroidCluster<T> cluster : clusters) {
- double dist = distance(point, cluster.getCenter());
- objFunction += (dist * dist) * FastMath.pow(membershipMatrix[i][j], fuzzyness);
+ final double dist = distance(point, cluster.getCenter());
+ objFunction += (dist * dist) * FastMath.pow(membershipMatrix[i][j], fuzziness);
j++;
}
i++;
@@ -293,7 +323,7 @@ public class FuzzyKMeansClusterer<T exte
double[] arr = new double[center.getPoint().length];
double sum = 0.0;
for (final T point : points) {
- final double u = FastMath.pow(membershipMatrix[i][j], fuzzyness);
+ final double u = FastMath.pow(membershipMatrix[i][j], fuzziness);
final double[] pointArr = point.getPoint();
for (int idx = 0; idx < arr.length; idx++) {
arr[idx] += u * pointArr[idx];
@@ -324,7 +354,7 @@ public class FuzzyKMeansClusterer<T exte
for (final CentroidCluster<T> c : clusters) {
final double distB = FastMath.abs(distance(point, c.getCenter()));
- sum += FastMath.pow(distA / distB, 2.0 / (fuzzyness - 1.0));
+ sum += FastMath.pow(distA / distB, 2.0 / (fuzziness - 1.0));
}
membershipMatrix[i][j] = 1.0 / sum;
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java?rev=1467797&r1=1467796&r2=1467797&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/FuzzyKMeansClustererTest.java Sun Apr 14 16:09:18 2013
@@ -16,13 +16,19 @@
*/
package org.apache.commons.math3.ml.clustering;
+import static org.hamcrest.CoreMatchers.*;
+import static org.junit.Assert.*;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.NullArgumentException;
-import org.junit.Assert;
+import org.apache.commons.math3.ml.distance.CanberraDistance;
+import org.apache.commons.math3.ml.distance.DistanceMeasure;
+import org.apache.commons.math3.random.JDKRandomGenerator;
+import org.apache.commons.math3.random.RandomGenerator;
import org.junit.Test;
public class FuzzyKMeansClustererTest {
@@ -52,7 +58,7 @@ public class FuzzyKMeansClustererTest {
boolean cluster1Found = false;
boolean cluster2Found = false;
boolean cluster3Found = false;
- Assert.assertEquals(3, clusters.size());
+ assertEquals(3, clusters.size());
for (final Cluster<DoublePoint> cluster : clusters) {
if (cluster.getPoints().containsAll(clusterOne)) {
cluster1Found = true;
@@ -64,9 +70,9 @@ public class FuzzyKMeansClustererTest {
cluster3Found = true;
}
}
- Assert.assertTrue(cluster1Found);
- Assert.assertTrue(cluster2Found);
- Assert.assertTrue(cluster3Found);
+ assertTrue(cluster1Found);
+ assertTrue(cluster2Found);
+ assertTrue(cluster3Found);
}
@Test(expected = MathIllegalArgumentException.class)
@@ -79,5 +85,20 @@ public class FuzzyKMeansClustererTest {
FuzzyKMeansClusterer<DoublePoint> clusterer = new FuzzyKMeansClusterer<DoublePoint>(3, 2.0);
clusterer.cluster(null);
}
+
+ @Test
+ public void testGetters() {
+ DistanceMeasure measure = new CanberraDistance();
+ RandomGenerator random = new JDKRandomGenerator();
+ FuzzyKMeansClusterer<DoublePoint> clusterer =
+ new FuzzyKMeansClusterer<DoublePoint>(3, 2.0, 100, measure, 1e-6, random);
+
+ assertEquals(3, clusterer.getK());
+ assertEquals(2.0, clusterer.getFuzziness(), 1e-6);
+ assertEquals(100, clusterer.getMaxIterations());
+ assertEquals(1e-6, clusterer.getEpsilon(), 1e-12);
+ assertThat(clusterer.getDistanceMeasure(), is(measure));
+ assertThat(clusterer.getRandomGenerator(), is(random));
+ }
}