You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/06/08 14:01:20 UTC
svn commit: r1490966 - in /mahout/trunk: CHANGELOG
examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
Author: gsingers
Date: Sat Jun 8 12:01:19 2013
New Revision: 1490966
URL: http://svn.apache.org/r1490966
Log:
MAHOUT-1084: fix random seed issue in synthetic control k-means clustering
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1490966&r1=1490965&r2=1490966&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sat Jun 8 12:01:19 2013
@@ -92,4 +92,6 @@ Release 0.8 - unreleased
MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
- MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
\ No newline at end of file
+ MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
+
+ MAHOUT-1084: Fixed issue with too many clusters in synthetic control example (liutengfei, gsingers)
\ No newline at end of file
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1490966&r1=1490965&r2=1490966&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sat Jun 8 12:01:19 2013
@@ -129,14 +129,17 @@ public final class Job extends AbstractJ
log.info("Preparing Input");
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
log.info("Running random seed to get initial clusters");
- Path clusters = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
+ Path clusters = new Path(output, "random-seeds");
clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
- log.info("Running KMeans");
+ log.info("Running KMeans with k = {}", k);
KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
maxIterations, true, 0.0, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
- "clusteredPoints"));
+ Path outGlob = new Path(output, "clusters-*-final");
+ Path clusteredPoints = new Path(output,
+ "clusteredPoints");
+ log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
+ ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
clusterDumper.printClusters(null);
}