You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/11/18 08:50:29 UTC
svn commit: r1542944 - in /mahout/trunk: CHANGELOG
core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
Author: smarthi
Date: Mon Nov 18 07:50:29 2013
New Revision: 1542944
URL: http://svn.apache.org/r1542944
Log:
MAHOUt-1314:StreamingKMeansReducer throws NullPointerException when REDUCE_STREAMING_KMEANS is set to true
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1542944&r1=1542943&r2=1542944&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Mon Nov 18 07:50:29 2013
@@ -10,6 +10,8 @@ Release 0.9 - unreleased
MAHOUT-1333: Fixed examples bin directory permissions in distribution archives (Mike Percy via sslavic)
+ MAHOUT-1314: StreamingKMeansReducer throws NullPointerException when REDUCE_STREAMING_KMEANS is set to true (smarthi)
+
MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob downsampling code where precision should have been retained (sslavic)
MAHOUT-1301: toString() method of SequentialAccessSparseVector has excess comma at the end (Alexander Senov, smarthi)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java?rev=1542944&r1=1542943&r2=1542944&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java Mon Nov 18 07:50:29 2013
@@ -31,8 +31,13 @@ import org.apache.mahout.clustering.stre
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Centroid;
import org.apache.mahout.math.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class StreamingKMeansReducer extends Reducer<IntWritable, CentroidWritable, IntWritable, CentroidWritable> {
+
+ private static final Logger log = LoggerFactory.getLogger(StreamingKMeansReducer.class);
+
/**
* Configuration for the MapReduce job.
*/
@@ -57,7 +62,7 @@ public class StreamingKMeansReducer exte
@Override
public Centroid apply(CentroidWritable input) {
Preconditions.checkNotNull(input);
- return input.getCentroid();
+ return input.getCentroid().clone();
}
}), conf).call());
} else {
@@ -66,7 +71,7 @@ public class StreamingKMeansReducer exte
int index = 0;
for (Vector centroid : getBestCentroids(intermediateCentroids, conf)) {
- context.write(new IntWritable(index), new CentroidWritable((Centroid)centroid));
+ context.write(new IntWritable(index), new CentroidWritable((Centroid) centroid));
++index;
}
}
@@ -84,6 +89,11 @@ public class StreamingKMeansReducer exte
}
public static Iterable<Vector> getBestCentroids(List<Centroid> centroids, Configuration conf) {
+
+ if (log.isInfoEnabled()) {
+ log.info("Number of Centroids: {}", centroids.size());
+ }
+
int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1);
int maxNumIterations = conf.getInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, 10);
float trimFraction = conf.getFloat(StreamingKMeansDriver.TRIM_FRACTION, 0.9f);
@@ -92,8 +102,8 @@ public class StreamingKMeansReducer exte
float testProbability = conf.getFloat(StreamingKMeansDriver.TEST_PROBABILITY, 0.1f);
int numRuns = conf.getInt(StreamingKMeansDriver.NUM_BALLKMEANS_RUNS, 3);
- BallKMeans clusterer = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
+ BallKMeans ballKMeansCluster = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
numClusters, maxNumIterations, trimFraction, kMeansPlusPlusInit, correctWeights, testProbability, numRuns);
- return clusterer.cluster(centroids);
+ return ballKMeansCluster.cluster(centroids);
}
}