You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/11/18 08:50:29 UTC

svn commit: r1542944 - in /mahout/trunk: CHANGELOG core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java

Author: smarthi
Date: Mon Nov 18 07:50:29 2013
New Revision: 1542944

URL: http://svn.apache.org/r1542944
Log:
MAHOUt-1314:StreamingKMeansReducer throws NullPointerException when REDUCE_STREAMING_KMEANS is set to true

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1542944&r1=1542943&r2=1542944&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Mon Nov 18 07:50:29 2013
@@ -10,6 +10,8 @@ Release 0.9 - unreleased
 
   MAHOUT-1333: Fixed examples bin directory permissions in distribution archives (Mike Percy via sslavic)
 
+  MAHOUT-1314: StreamingKMeansReducer throws NullPointerException when REDUCE_STREAMING_KMEANS is set to true (smarthi)
+
   MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob downsampling code where precision should have been retained (sslavic) 
 
   MAHOUT-1301: toString() method of SequentialAccessSparseVector has excess comma at the end (Alexander Senov, smarthi)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java?rev=1542944&r1=1542943&r2=1542944&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java Mon Nov 18 07:50:29 2013
@@ -31,8 +31,13 @@ import org.apache.mahout.clustering.stre
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.Centroid;
 import org.apache.mahout.math.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class StreamingKMeansReducer extends Reducer<IntWritable, CentroidWritable, IntWritable, CentroidWritable> {
+
+  private static final Logger log = LoggerFactory.getLogger(StreamingKMeansReducer.class);
+
   /**
    * Configuration for the MapReduce job.
    */
@@ -57,7 +62,7 @@ public class StreamingKMeansReducer exte
             @Override
             public Centroid apply(CentroidWritable input) {
               Preconditions.checkNotNull(input);
-              return input.getCentroid();
+              return input.getCentroid().clone();
             }
           }), conf).call());
     } else {
@@ -66,7 +71,7 @@ public class StreamingKMeansReducer exte
 
     int index = 0;
     for (Vector centroid : getBestCentroids(intermediateCentroids, conf)) {
-      context.write(new IntWritable(index), new CentroidWritable((Centroid)centroid));
+      context.write(new IntWritable(index), new CentroidWritable((Centroid) centroid));
       ++index;
     }
   }
@@ -84,6 +89,11 @@ public class StreamingKMeansReducer exte
   }
 
   public static Iterable<Vector> getBestCentroids(List<Centroid> centroids, Configuration conf) {
+
+    if (log.isInfoEnabled()) {
+      log.info("Number of Centroids: {}", centroids.size());
+    }
+
     int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1);
     int maxNumIterations = conf.getInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, 10);
     float trimFraction = conf.getFloat(StreamingKMeansDriver.TRIM_FRACTION, 0.9f);
@@ -92,8 +102,8 @@ public class StreamingKMeansReducer exte
     float testProbability = conf.getFloat(StreamingKMeansDriver.TEST_PROBABILITY, 0.1f);
     int numRuns = conf.getInt(StreamingKMeansDriver.NUM_BALLKMEANS_RUNS, 3);
 
-    BallKMeans clusterer = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
+    BallKMeans ballKMeansCluster = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
         numClusters, maxNumIterations, trimFraction, kMeansPlusPlusInit, correctWeights, testProbability, numRuns);
-    return clusterer.cluster(centroids);
+    return ballKMeansCluster.cluster(centroids);
   }
 }