You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/15 03:32:14 UTC

svn commit: r784640 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/kmeans/ examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/ examples/src/main/java/...

Author: gsingers
Date: Mon Jun 15 01:32:13 2009
New Revision: 784640

URL: http://svn.apache.org/viewvc?rev=784640&view=rev
Log:
MAHOUT-132: make magic names constants

Added:
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Mon Jun 15 01:32:13 2009
@@ -21,6 +21,15 @@
 
 public class CanopyClusteringJob {
 
+  /**
+   * The default name of the canopies output sub-directory.
+   */     
+  public static final String DEFAULT_CANOPIES_OUTPUT_DIRECTORY = "/canopies";
+  /**
+   * The default name of the directory used to output clusters.
+   */
+  public static final String DEFAULT_CLUSTER_OUTPUT_DIRECTORY = ClusterDriver.DEFAULT_CLUSTER_OUTPUT_DIRECTORY;
+
   private CanopyClusteringJob() {
   }
 
@@ -47,8 +56,8 @@
    */
   public static void runJob(String input, String output,
                             String measureClassName, double t1, double t2) throws IOException {
-    CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2);
-    ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2);
+    CanopyDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClassName, t1, t2);
+    ClusterDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output, measureClassName, t1, t2);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Mon Jun 15 01:32:13 2009
@@ -30,6 +30,8 @@
 
 public class ClusterDriver {
 
+  public static final String DEFAULT_CLUSTER_OUTPUT_DIRECTORY = "/clusters";
+
   private ClusterDriver() {
   }
 
@@ -68,7 +70,7 @@
     conf.setOutputValueClass(Text.class);
 
     FileInputFormat.setInputPaths(conf, new Path(points));
-    Path outPath = new Path(output + "/clusters");
+    Path outPath = new Path(output + DEFAULT_CLUSTER_OUTPUT_DIRECTORY);
     FileOutputFormat.setOutputPath(conf, outPath);
 
     conf.setMapperClass(ClusterMapper.class);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Mon Jun 15 01:32:13 2009
@@ -29,9 +29,9 @@
   @Override
   public void map(WritableComparable<?> key, Text values,
       OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
-    Vector point = AbstractVector.decodeVector(values.toString());
-    Cluster.outputPointWithClusterInfo(values.toString(), point, clusters,
-        values, output);
+    final String valuesAsString = values.toString();
+    final Vector point = AbstractVector.decodeVector(valuesAsString);
+    Cluster.outputPointWithClusterInfo(valuesAsString, point, clusters, values, output);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Mon Jun 15 01:32:13 2009
@@ -32,6 +32,11 @@
 
 public class KMeansDriver {
 
+  /**
+   * The name of the directory used to output final results. 
+   */
+  public static final String DEFAULT_OUTPUT_DIRECTORY = "/points";
+  
   private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);
 
   private KMeansDriver() {
@@ -79,7 +84,7 @@
     }
     // now actually cluster the points
     log.info("Clustering ");
-    runClustering(input, clustersIn, output + "/points", measureClass, delta);
+    runClustering(input, clustersIn, output + DEFAULT_OUTPUT_DIRECTORY, measureClass, delta);
   }
 
   /**

Added: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java?rev=784640&view=auto
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java (added)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java Mon Jun 15 01:32:13 2009
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.clustering.syntheticcontrol;
+
+/**
+ * Constants shared between examples.
+ */
+public final class Constants {
+
+    /**
+     * Directory containing output for examples.
+     */
+    public static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
+    /**
+     * Directory used to store the input after it has been processed from it's 
+     * original form into one suitable for processing by the clustering examples.
+     */
+    public static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
+}

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Mon Jun 15 01:32:13 2009
@@ -25,6 +25,9 @@
 
 import java.io.IOException;
 
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.CLUSTERED_POINTS_OUTPUT_DIRECTORY;
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+
 public class Job {
   private Job() {
   }
@@ -69,10 +72,11 @@
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
     if (dfs.exists(outPath))
       dfs.delete(outPath, true);
-    InputDriver.runJob(input, output + "/data");
-    CanopyClusteringJob.runJob(output + "/data", output, measureClassName,
+    final String directoryContainingConvertedInput = output + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    InputDriver.runJob(input, directoryContainingConvertedInput);
+    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClassName,
         t1, t2);
-    OutputDriver.runJob(output + "/clusters", output + "/clustered-points");
+    OutputDriver.runJob(output + CanopyClusteringJob.DEFAULT_CLUSTER_OUTPUT_DIRECTORY, output + CLUSTERED_POINTS_OUTPUT_DIRECTORY);
 
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Mon Jun 15 01:32:13 2009
@@ -33,7 +33,11 @@
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
 import org.apache.mahout.matrix.Vector;
 
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+
 public class Job {
+       
+    
   private Job() {
   }
 
@@ -84,8 +88,9 @@
       fs.delete(outPath, true);
     }
     fs.mkdirs(outPath);
-    InputDriver.runJob(input, output + "/data");
-    DirichletDriver.runJob(output + "/data", output + "/state", modelFactory,
+    final String directoryContainingConvertedInput = output + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    InputDriver.runJob(input, directoryContainingConvertedInput);
+    DirichletDriver.runJob(directoryContainingConvertedInput, output + "/state", modelFactory,
         numModels, maxIterations, alpha_0, numReducers);
     printResults(output + "/state", modelFactory, maxIterations, numModels,
         alpha_0);

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Mon Jun 15 01:32:13 2009
@@ -26,9 +26,13 @@
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.CLUSTERED_POINTS_OUTPUT_DIRECTORY;
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
 
 public class Job {
-  private Job() {
+  
+
+private Job() {
   }
 
   public static void main(String[] args) throws IOException {
@@ -77,10 +81,12 @@
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
     if (dfs.exists(outPath))
       dfs.delete(outPath, true);
-    InputDriver.runJob(input, output + "/data");
-    CanopyClusteringJob.runJob(output + "/data", output, measureClass, t1, t2);
-    KMeansDriver.runJob(output + "/data", output + "/canopies", output,
+    final String directoryContainingConvertedInput = output + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    InputDriver.runJob(input, directoryContainingConvertedInput);
+    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClass, t1, t2);
+    KMeansDriver.runJob(directoryContainingConvertedInput, 
+            output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output,
         measureClass, convergenceDelta, maxIterations, 1);
-    //    OutputDriver.runJob(output + "/points", output + "/clustered-points");
+    //    OutputDriver.runJob(output + KMeansDriver.DEFAULT_OUTPUT_DIRECTORY, output + CLUSTERED_POINTS_OUTPUT_DIRECTORY);
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java Mon Jun 15 01:32:13 2009
@@ -35,12 +35,11 @@
   @Override
   public void map(LongWritable key, Text values,
       OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
-    String foo = values.toString();
+    final String foo = values.toString();
     int ix = foo.indexOf(']');
     Cluster canopy = Cluster.decodeCluster(foo.substring(0, ix + 1));
     Vector point = AbstractVector.decodeVector(foo.substring(ix + 3));
-    output.collect(new Text(canopy.getIdentifier()), new Text(point
-        .asFormatString()));
+    output.collect(new Text(canopy.getIdentifier()), new Text(point.asFormatString()));
   }
 
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=784640&r1=784639&r2=784640&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Mon Jun 15 01:32:13 2009
@@ -24,6 +24,9 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyJob;
 
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.CLUSTERED_POINTS_OUTPUT_DIRECTORY;
+import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+
 import java.io.IOException;
 
 public class Job {
@@ -74,14 +77,16 @@
     Path outPath = new Path(output);
     client.setConf(conf);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
-    if (dfs.exists(outPath))
+    if (dfs.exists(outPath)) {
       dfs.delete(outPath, true);
-    InputDriver.runJob(input, output + "/data");
-    MeanShiftCanopyJob.runJob(output + "/data", output + "/meanshift",
+    }
+    final String directoryContainingConvertedInput = output + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    InputDriver.runJob(input, directoryContainingConvertedInput);
+    MeanShiftCanopyJob.runJob(directoryContainingConvertedInput, output + "/meanshift",
         measureClassName, t1, t2, convergenceDelta, maxIterations);
     FileStatus[] status = dfs.listStatus(new Path(output + "/meanshift"));
     OutputDriver.runJob(status[status.length - 1].getPath().toString(),
-        output + "/clustered-points");
+        output + CLUSTERED_POINTS_OUTPUT_DIRECTORY);
   }
 
 }