You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by df...@apache.org on 2013/06/16 19:04:09 UTC
svn commit: r1493533 - in /mahout/trunk: CHANGELOG
core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
Author: dfilimon
Date: Sun Jun 16 17:04:08 2013
New Revision: 1493533
URL: http://svn.apache.org/r1493533
Log:
MAHOUT-1255: Fix for weights in Multinomial sometimes overflowing in BallKMeans
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1493533&r1=1493532&r2=1493533&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun Jun 16 17:04:08 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.8 - unreleased
+ MAHOUT-1255: Fix for weights in Multinomial sometimes overflowing in BallKMeans (dfilimon)
+
MAHOUT-1254: Final round of cleanup for StreamingKMeans (dfilimon)
MAHOUT-1263: Serialise/Deserialise Lambda value for OnlineLogisticRegression (Mike Davy via smarthi)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java?rev=1493533&r1=1493532&r2=1493533&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java Sun Jun 16 17:04:08 2013
@@ -281,11 +281,11 @@ public class BallKMeans implements Itera
* @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind.
*/
private void initializeSeedsKMeansPlusPlus(List<? extends WeightedVector> datapoints) {
- Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster "
- + "sensibly");
+ Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster " +
+ "sensibly");
Preconditions.checkArgument(datapoints.size() >= numClusters,
String.format("Must have more datapoints [%d] than clusters [%d]", datapoints.size(), numClusters));
- // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints.
+ // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints.
Centroid center = new Centroid(datapoints.iterator().next());
for (WeightedVector row : Iterables.skip(datapoints, 1)) {
center.update(row);
@@ -330,7 +330,7 @@ public class BallKMeans implements Itera
// set to the squared distance from c_1
for (int i = 0; i < datapoints.size(); ++i) {
WeightedVector row = datapoints.get(i);
- final double w = distanceMeasure.distance(c_1, row) * row.getWeight();
+ final double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight());
seedSelector.set(i, w);
}
@@ -446,8 +446,8 @@ public class BallKMeans implements Itera
return Iterators.transform(centroids.iterator(), new Function<Vector, Centroid>() {
@Override
public Centroid apply(Vector input) {
- Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids "
- + "searcher");
+ Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids " +
+ "searcher");
//noinspection ConstantConditions
return (Centroid)input;
}