You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/06/08 14:01:20 UTC

svn commit: r1490966 - in /mahout/trunk: CHANGELOG examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java

Author: gsingers
Date: Sat Jun  8 12:01:19 2013
New Revision: 1490966

URL: http://svn.apache.org/r1490966
Log:
MAHOUT-1084: fix random seed issue in synthetic control k-means clustering

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1490966&r1=1490965&r2=1490966&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sat Jun  8 12:01:19 2013
@@ -92,4 +92,6 @@ Release 0.8 - unreleased
 
   MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
 
-  MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
\ No newline at end of file
+  MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
+
+  MAHOUT-1084: Fixed issue with too many clusters in synthetic control example (liutengfei, gsingers)
\ No newline at end of file

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1490966&r1=1490965&r2=1490966&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sat Jun  8 12:01:19 2013
@@ -129,14 +129,17 @@ public final class Job extends AbstractJ
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running random seed to get initial clusters");
-    Path clusters = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
+    Path clusters = new Path(output, "random-seeds");
     clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
-    log.info("Running KMeans");
+    log.info("Running KMeans with k = {}", k);
     KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
         maxIterations, true, 0.0, false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
-        "clusteredPoints"));
+    Path outGlob = new Path(output, "clusters-*-final");
+    Path clusteredPoints = new Path(output,
+            "clusteredPoints");
+    log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
+    ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
     clusterDumper.printClusters(null);
   }