You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2014/01/28 09:07:35 UTC
svn commit: r1561975 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/
core/src/main/java/org/apache/mahout/clustering/kmeans/
core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/
core/src/test/java/org/apache...
Author: smarthi
Date: Tue Jan 28 08:07:34 2014
New Revision: 1561975
URL: http://svn.apache.org/r1561975
Log:
MAHOUT-1310: Changed method signatures to remove unused DistanceMeasure parameter.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -110,7 +110,6 @@ public class FuzzyKMeansDriver extends A
input,
clusters,
output,
- measure,
convergenceDelta,
maxIterations,
fuzziness,
@@ -124,32 +123,31 @@ public class FuzzyKMeansDriver extends A
/**
* Iterate over the input vectors to produce clusters and, if requested, use the
* results of the final iteration to cluster the input vectors.
- *
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the directory pathname for initial & computed clusters
* @param output
- * the directory pathname for output points
+ * the directory pathname for output points
* @param convergenceDelta
- * the convergence delta value
+* the convergence delta value
* @param maxIterations
- * the maximum number of iterations
+* the maximum number of iterations
* @param m
- * the fuzzification factor, see
- * http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
- * @param runClustering
- * true if points are to be clustered after iterations complete
+* the fuzzification factor, see
+* http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
+ * @param runClustering
+* true if points are to be clustered after iterations complete
* @param emitMostLikely
- * a boolean if true emit only most likely cluster for each point
- * @param threshold
- * a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+* a boolean if true emit only most likely cluster for each point
+ * @param threshold
+* a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
* @param runSequential if true run in sequential execution mode
*/
public static void run(Path input,
Path clustersIn,
Path output,
- DistanceMeasure measure,
double convergenceDelta,
int maxIterations,
float m,
@@ -162,7 +160,6 @@ public class FuzzyKMeansDriver extends A
input,
clustersIn,
output,
- measure,
convergenceDelta,
maxIterations,
m,
@@ -172,7 +169,6 @@ public class FuzzyKMeansDriver extends A
clusterData(conf, input,
clustersOut,
output,
- measure,
convergenceDelta,
m,
emitMostLikely,
@@ -189,27 +185,26 @@ public class FuzzyKMeansDriver extends A
* @param clustersIn
* the directory pathname for initial & computed clusters
* @param output
- * the directory pathname for output points
+ * the directory pathname for output points
* @param convergenceDelta
- * the convergence delta value
+* the convergence delta value
* @param maxIterations
- * the maximum number of iterations
+* the maximum number of iterations
* @param m
- * the fuzzification factor, see
- * http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
- * @param runClustering
- * true if points are to be clustered after iterations complete
+* the fuzzification factor, see
+* http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
+ * @param runClustering
+* true if points are to be clustered after iterations complete
* @param emitMostLikely
- * a boolean if true emit only most likely cluster for each point
- * @param threshold
- * a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+* a boolean if true emit only most likely cluster for each point
+ * @param threshold
+* a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
* @param runSequential if true run in sequential execution mode
*/
public static void run(Configuration conf,
Path input,
Path clustersIn,
Path output,
- DistanceMeasure measure,
double convergenceDelta,
int maxIterations,
float m,
@@ -219,14 +214,13 @@ public class FuzzyKMeansDriver extends A
boolean runSequential)
throws IOException, ClassNotFoundException, InterruptedException {
Path clustersOut =
- buildClusters(conf, input, clustersIn, output, measure, convergenceDelta, maxIterations, m, runSequential);
+ buildClusters(conf, input, clustersIn, output, convergenceDelta, maxIterations, m, runSequential);
if (runClustering) {
log.info("Clustering");
clusterData(conf,
input,
clustersOut,
output,
- measure,
convergenceDelta,
m,
emitMostLikely,
@@ -237,14 +231,13 @@ public class FuzzyKMeansDriver extends A
/**
* Iterate over the input vectors to produce cluster directories for each iteration
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the file pathname for initial cluster centers
* @param output
* the directory pathname for output points
- * @param measure
- * the classname of the DistanceMeasure
* @param convergenceDelta
* the convergence delta value
* @param maxIterations
@@ -253,14 +246,13 @@ public class FuzzyKMeansDriver extends A
* the fuzzification factor, see
* http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
* @param runSequential if true run in sequential execution mode
- *
+ *
* @return the Path of the final clusters directory
*/
public static Path buildClusters(Configuration conf,
Path input,
Path clustersIn,
Path output,
- DistanceMeasure measure,
double convergenceDelta,
int maxIterations,
float m,
@@ -293,28 +285,25 @@ public class FuzzyKMeansDriver extends A
/**
* Run the job using supplied arguments
- *
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the directory pathname for input clusters
* @param output
- * the directory pathname for output points
- * @param measure
- * the classname of the DistanceMeasure
+ * the directory pathname for output points
* @param convergenceDelta
- * the convergence delta value
+* the convergence delta value
* @param emitMostLikely
- * a boolean if true emit only most likely cluster for each point
+* a boolean if true emit only most likely cluster for each point
* @param threshold
- * a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+* a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
* @param runSequential if true run in sequential execution mode
*/
public static void clusterData(Configuration conf,
Path input,
Path clustersIn,
Path output,
- DistanceMeasure measure,
double convergenceDelta,
float m,
boolean emitMostLikely,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -100,7 +100,7 @@ public class KMeansDriver extends Abstra
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
- run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
+ run(getConf(), input, clusters, output, convergenceDelta, maxIterations, runClustering,
clusterClassificationThreshold, runSequential);
return 0;
}
@@ -108,15 +108,13 @@ public class KMeansDriver extends Abstra
/**
* Iterate over the input vectors to produce clusters and, if requested, use the results of the final iteration to
* cluster the input vectors.
- *
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the directory pathname for initial & computed clusters
* @param output
* the directory pathname for output points
- * @param measure
- * the DistanceMeasure to use
* @param convergenceDelta
* the convergence delta value
* @param maxIterations
@@ -129,36 +127,33 @@ public class KMeansDriver extends Abstra
* @param runSequential
* if true execute sequential algorithm
*/
- public static void run(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure,
- double convergenceDelta, int maxIterations, boolean runClustering, double clusterClassificationThreshold,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
+ public static void run(Configuration conf, Path input, Path clustersIn, Path output,
+ double convergenceDelta, int maxIterations, boolean runClustering, double clusterClassificationThreshold,
+ boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
// iterate until the clusters converge
String delta = Double.toString(convergenceDelta);
if (log.isInfoEnabled()) {
- log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output,
- measure.getClass().getName());
+ log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output);
log.info("convergence: {} max Iterations: {}", convergenceDelta, maxIterations);
}
- Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
+ Path clustersOut = buildClusters(conf, input, clustersIn, output, maxIterations, delta, runSequential);
if (runClustering) {
log.info("Clustering data");
- clusterData(conf, input, clustersOut, output, measure, clusterClassificationThreshold, runSequential);
+ clusterData(conf, input, clustersOut, output, clusterClassificationThreshold, runSequential);
}
}
/**
* Iterate over the input vectors to produce clusters and, if requested, use the results of the final iteration to
* cluster the input vectors.
- *
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the directory pathname for initial & computed clusters
* @param output
* the directory pathname for output points
- * @param measure
- * the DistanceMeasure to use
* @param convergenceDelta
* the convergence delta value
* @param maxIterations
@@ -166,21 +161,22 @@ public class KMeansDriver extends Abstra
* @param runClustering
* true if points are to be clustered after iterations are completed
* @param clusterClassificationThreshold
- * Is a clustering strictness / outlier removal parrameter. Its value should be between 0 and 1. Vectors
+ * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors
* having pdf below this value will not be clustered.
* @param runSequential
* if true execute sequential algorithm
*/
- public static void run(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta,
- int maxIterations, boolean runClustering, double clusterClassificationThreshold, boolean runSequential)
+ public static void run(Path input, Path clustersIn, Path output, double convergenceDelta,
+ int maxIterations, boolean runClustering, double clusterClassificationThreshold, boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
- run(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, runClustering,
+ run(new Configuration(), input, clustersIn, output, convergenceDelta, maxIterations, runClustering,
clusterClassificationThreshold, runSequential);
}
/**
* Iterate over the input vectors to produce cluster directories for each iteration
*
+ *
* @param conf
* the Configuration to use
* @param input
@@ -189,20 +185,18 @@ public class KMeansDriver extends Abstra
* the directory pathname for initial & computed clusters
* @param output
* the directory pathname for output points
- * @param measure
- * the classname of the DistanceMeasure
* @param maxIterations
* the maximum number of iterations
* @param delta
* the convergence delta value
* @param runSequential
* if true execute sequential algorithm
- *
+ *
* @return the Path of the final clusters directory
*/
public static Path buildClusters(Configuration conf, Path input, Path clustersIn, Path output,
- DistanceMeasure measure, int maxIterations, String delta, boolean runSequential) throws IOException,
- InterruptedException, ClassNotFoundException {
+ int maxIterations, String delta, boolean runSequential) throws IOException,
+ InterruptedException, ClassNotFoundException {
double convergenceDelta = Double.parseDouble(delta);
List<Cluster> clusters = Lists.newArrayList();
@@ -227,28 +221,26 @@ public class KMeansDriver extends Abstra
/**
* Run the job using supplied arguments
- *
+ *
* @param input
* the directory pathname for input points
* @param clustersIn
* the directory pathname for input clusters
* @param output
* the directory pathname for output points
- * @param measure
- * the classname of the DistanceMeasure
* @param clusterClassificationThreshold
- * Is a clustering strictness / outlier removal parrameter. Its value should be between 0 and 1. Vectors
+ * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors
* having pdf below this value will not be clustered.
* @param runSequential
* if true execute sequential algorithm
*/
- public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure,
- double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException,
- ClassNotFoundException {
+ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output,
+ double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException,
+ ClassNotFoundException {
if (log.isInfoEnabled()) {
log.info("Running Clustering");
- log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output, measure);
+ log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output);
}
ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn);
ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -152,6 +152,7 @@ public class SpectralKMeansDriver extend
* @param ssvd
* Flag to indicate the eigensolver to use
* @param numReducers
+ * Number of reducers
* @param blockHeight
* @param oversampling
* @param poweriters
@@ -244,7 +245,7 @@ public class SpectralKMeansDriver extend
// Run the KMeansDriver
Path answer = new Path(output, "kmeans_out");
- KMeansDriver.run(conf, data, initialclusters, answer, measure, convergenceDelta, maxIterations, true, 0.0, false);
+ KMeansDriver.run(conf, data, initialclusters, answer, convergenceDelta, maxIterations, true, 0.0, false);
// Restore name to id mapping and read through the cluster assignments
Path mappingPath = new Path(new Path(conf.get("hadoop.tmp.dir")), "generic_input_mapping");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Tue Jan 28 08:07:34 2014
@@ -349,8 +349,8 @@ public final class TestKmeansClustering
// now run the KMeans job
Path kmeansOutput = new Path(outputPath, "kmeans");
- KMeansDriver.run(getConfiguration(), pointsPath, new Path(outputPath, "clusters-0-final"), kmeansOutput, new EuclideanDistanceMeasure(),
- 0.001, 10, true, 0.0, false);
+ KMeansDriver.run(getConfiguration(), pointsPath, new Path(outputPath, "clusters-0-final"), kmeansOutput,
+ 0.001, 10, true, 0.0, false);
// now compare the expected clusters with actual
Path clusteredPointsPath = new Path(kmeansOutput, "clusteredPoints");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java Tue Jan 28 08:07:34 2014
@@ -101,7 +101,7 @@ public final class ClusterCountReaderTes
CanopyDriver.run(conf, pointsPath, outputPathForCanopy, measure, 4.0, 3.0, true, 0.0, true);
Path clustersIn = new Path(outputPathForCanopy, new Path(Cluster.CLUSTERS_DIR + '0'
+ Cluster.FINAL_ITERATION_SUFFIX));
- KMeansDriver.run(conf, pointsPath, clustersIn, outputPathForKMeans, measure, 1, 1, true, 0.0, true);
+ KMeansDriver.run(conf, pointsPath, clustersIn, outputPathForKMeans, 1, 1, true, 0.0, true);
}
private static void verifyThatNumberOfClustersIsCorrect(Configuration conf, Path clusteredPointsPath) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Tue Jan 28 08:07:34 2014
@@ -102,7 +102,7 @@ public class DisplayFuzzyKMeans extends
ClassNotFoundException, InterruptedException {
Path clustersIn = new Path(output, "random-seeds");
RandomSeedGenerator.buildRandom(conf, samples, clustersIn, 3, measure);
- FuzzyKMeansDriver.run(samples, clustersIn, output, measure, threshold, maxIterations, m, true, true, threshold,
+ FuzzyKMeansDriver.run(samples, clustersIn, output, threshold, maxIterations, m, true, true, threshold,
true);
loadClustersWritable(output);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Tue Jan 28 08:07:34 2014
@@ -93,7 +93,7 @@ public class DisplayKMeans extends Displ
throws IOException, InterruptedException, ClassNotFoundException {
Path clustersIn = new Path(output, "random-seeds");
RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
- KMeansDriver.run(samples, clustersIn, output, measure, convergenceDelta, maxIterations, true, 0.0, true);
+ KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true);
loadClustersWritable(output);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Tue Jan 28 08:07:34 2014
@@ -133,14 +133,12 @@ public final class Job extends AbstractJ
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
log.info("Running Canopy to get initial clusters");
Path canopyOutput = new Path(output, "canopies");
- CanopyDriver
- .run(new Configuration(), directoryContainingConvertedInput, canopyOutput, measure, t1, t2, false, 0.0, false);
+ CanopyDriver.run(new Configuration(), directoryContainingConvertedInput, canopyOutput, measure, t1, t2, false, 0.0, false);
log.info("Running FuzzyKMeans");
FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(canopyOutput, "clusters-0-final"), output,
- measure, convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
+ convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Tue Jan 28 08:07:34 2014
@@ -132,12 +132,11 @@ public final class Job extends AbstractJ
Path clusters = new Path(output, "random-seeds");
clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
log.info("Running KMeans with k = {}", k);
- KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
+ KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, convergenceDelta,
maxIterations, true, 0.0, false);
// run ClusterDumper
Path outGlob = new Path(output, "clusters-*-final");
- Path clusteredPoints = new Path(output,
- "clusteredPoints");
+ Path clusteredPoints = new Path(output,"clusteredPoints");
log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
clusterDumper.printClusters(null);
@@ -179,7 +178,7 @@ public final class Job extends AbstractJ
false);
log.info("Running KMeans");
KMeansDriver.run(conf, directoryContainingConvertedInput, new Path(canopyOutput, Cluster.INITIAL_CLUSTERS_DIR
- + "-final"), output, measure, convergenceDelta, maxIterations, true, 0.0, false);
+ + "-final"), output, convergenceDelta, maxIterations, true, 0.0, false);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
"clusteredPoints"));
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue Jan 28 08:07:34 2014
@@ -201,7 +201,7 @@ public final class TestClusterDumper ext
// now run the KMeans job
Path kMeansOutput = new Path(output, "kmeans");
KMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(output,
- "clusters-0-final"), kMeansOutput, measure, 0.001, 10, true, 0.0, false);
+ "clusters-0-final"), kMeansOutput, 0.001, 10, true, 0.0, false);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
output, 10), new Path(kMeansOutput, "clusteredPoints"));
@@ -219,7 +219,7 @@ public final class TestClusterDumper ext
// now run the KMeans job
Path kmeansOutput = new Path(output, "kmeans");
KMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(output,
- "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, false);
+ "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, false);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
output, 10), new Path(kmeansOutput, "clusteredPoints"));
@@ -238,7 +238,7 @@ public final class TestClusterDumper ext
// now run the Fuzzy KMeans job
Path kMeansOutput = new Path(output, "kmeans");
FuzzyKMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(
- output, "clusters-0-final"), kMeansOutput, measure, 0.001, 10, 1.1f, true,
+ output, "clusters-0-final"), kMeansOutput, 0.001, 10, 1.1f, true,
true, 0, true);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java Tue Jan 28 08:07:34 2014
@@ -284,7 +284,7 @@ public final class TestClusterEvaluator
CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
// now run the KMeans job
Path kmeansOutput = new Path(output, "kmeans");
- KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, true);
+ KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
int numIterations = 10;
Path clustersIn = new Path(kmeansOutput, "clusters-2");
RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
@@ -305,7 +305,7 @@ public final class TestClusterEvaluator
CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
// now run the KMeans job
- FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, measure, 0.001, 10, 2,
+ FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
true, true, 0, true);
int numIterations = 10;
Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Tue Jan 28 08:07:34 2014
@@ -124,7 +124,6 @@ public final class TestCDbwEvaluator ext
* double y-value of the sample mean
* @param sd
* double standard deviation of the samples
- * @throws Exception
*/
private void generateSamples(int num, double mx, double my, double sd) {
log.info("Generating {} samples m=[{}, {}] sd={}", num, mx, my, sd);
@@ -288,7 +287,7 @@ public final class TestCDbwEvaluator ext
CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
// now run the KMeans job
Path kmeansOutput = new Path(output, "kmeans");
- KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, true);
+ KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
int numIterations = 10;
Path clustersIn = new Path(kmeansOutput, "clusters-10-final");
RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
@@ -310,7 +309,7 @@ public final class TestCDbwEvaluator ext
CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
// now run the KMeans job
- FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, measure, 0.001, 10, 2,
+ FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
true, true, 0, true);
int numIterations = 10;
Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");