You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 20:08:05 UTC
svn commit: r909871 [3/7] - in /lucene/mahout/trunk/examples/src/main/java/org/apache/mahout: analysis/ cf/taste/ejb/ cf/taste/example/ cf/taste/example/bookcrossing/ cf/taste/example/grouplens/ cf/taste/example/jester/ cf/taste/example/netflix/ classi...

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Sat Feb 13 19:07:36 2010
@@ -26,78 +26,88 @@
 
 import org.apache.mahout.clustering.canopy.Canopy;
 import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 class DisplayKMeans extends DisplayDirichlet {
+  private static final double t1 = 3.0;
+  
+  private static final double t2 = 1.5;
+  
+  private static List<List<Cluster>> clusters;
+  
   DisplayKMeans() {
     initialize();
     this.setTitle("K-Means Clusters (> 5% of population)");
   }
-
-  private static List<List<Cluster>> clusters;
-
-  private static final double t1 = 3.0;
-
-  private static final double t2 = 1.5;
-
+  
   @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
     Vector dv = new DenseVector(2);
-    int i = clusters.size() - 1;
-    for (List<Cluster> cls : clusters) {
+    int i = DisplayKMeans.clusters.size() - 1;
+    for (List<Cluster> cls : DisplayKMeans.clusters) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(colors[Math.min(colors.length - 1, i--)]);
+      g2.setColor(DisplayDirichlet.colors[Math.min(DisplayDirichlet.colors.length - 1, i--)]);
       for (Cluster cluster : cls) {
-        //if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
-          dv.assign(cluster.getStd() * 3);
-          plotEllipse(g2, cluster.getCenter(), dv);
-        //}
+        // if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
+        dv.assign(cluster.getStd() * 3);
+        DisplayDirichlet.plotEllipse(g2, cluster.getCenter(), dv);
+        // }
       }
     }
   }
-
+  
   /**
-   * This is the reference k-means implementation. Given its inputs it iterates
-   * over the points and clusters until their centers converge or until the
-   * maximum number of iterations is exceeded.
+   * This is the reference k-means implementation. Given its inputs it iterates over the points and clusters
+   * until their centers converge or until the maximum number of iterations is exceeded.
    * 
-   * @param points the input List<Vector> of points
-   * @param clusters the initial List<Cluster> of clusters
-   * @param measure the DistanceMeasure to use
-   * @param maxIter the maximum number of iterations
+   * @param points
+   *          the input List<Vector> of points
+   * @param clusters
+   *          the initial List<Cluster> of clusters
+   * @param measure
+   *          the DistanceMeasure to use
+   * @param maxIter
+   *          the maximum number of iterations
    */
   private static void referenceKmeans(List<VectorWritable> points,
-      List<List<Cluster>> clusters, DistanceMeasure measure, int maxIter) {
+                                      List<List<Cluster>> clusters,
+                                      DistanceMeasure measure,
+                                      int maxIter) {
     boolean converged = false;
     int iteration = 0;
     while (!converged && iteration < maxIter) {
       List<Cluster> next = new ArrayList<Cluster>();
       List<Cluster> cs = clusters.get(iteration++);
-      for (Cluster c : cs)
+      for (Cluster c : cs) {
         next.add(new Cluster(c.getCenter()));
+      }
       clusters.add(next);
-      converged = iterateReference(points, clusters.get(iteration), measure);
+      converged = DisplayKMeans.iterateReference(points, clusters.get(iteration), measure);
     }
   }
-
+  
   /**
-   * Perform a single iteration over the points and clusters, assigning points
-   * to clusters and returning if the iterations are completed.
+   * Perform a single iteration over the points and clusters, assigning points to clusters and returning if
+   * the iterations are completed.
    * 
-   * @param points the List<Vector> having the input points
-   * @param clusters the List<Cluster> clusters
-   * @param measure a DistanceMeasure to use
+   * @param points
+   *          the List<Vector> having the input points
+   * @param clusters
+   *          the List<Cluster> clusters
+   * @param measure
+   *          a DistanceMeasure to use
    * @return
    */
   private static boolean iterateReference(List<VectorWritable> points,
-      List<Cluster> clusters, DistanceMeasure measure) {
+                                          List<Cluster> clusters,
+                                          DistanceMeasure measure) {
     // iterate through all points, assigning each to the nearest cluster
     for (VectorWritable point : points) {
       Cluster closestCluster = null;
@@ -114,41 +124,44 @@
     // test for convergence
     boolean converged = true;
     for (Cluster cluster : clusters) {
-      if (!cluster.computeConvergence(measure, 0.001))
+      if (!cluster.computeConvergence(measure, 0.001)) {
         converged = false;
+      }
     }
     // update the cluster centers
-    if (!converged)
-      for (Cluster cluster : clusters)
+    if (!converged) {
+      for (Cluster cluster : clusters) {
         cluster.recomputeCenter();
+      }
+    }
     return converged;
   }
-
+  
   /**
    * Iterate through the points, adding new canopies. Return the canopies.
    * 
    * @param measure
-   *            a DistanceMeasure to use
+   *          a DistanceMeasure to use
    * @param points
-   *            a list<Vector> defining the points to be clustered
+   *          a list<Vector> defining the points to be clustered
    * @param t1
-   *            the T1 distance threshold
+   *          the T1 distance threshold
    * @param t2
-   *            the T2 distance threshold
+   *          the T2 distance threshold
    * @return the List<Canopy> created
    */
   static List<Canopy> populateCanopies(DistanceMeasure measure,
-      List<VectorWritable> points, double t1, double t2) {
+                                       List<VectorWritable> points,
+                                       double t1,
+                                       double t2) {
     List<Canopy> canopies = new ArrayList<Canopy>();
     /**
-     * Reference Implementation: Given a distance metric, one can create
-     * canopies as follows: Start with a list of the data points in any order,
-     * and with two distance thresholds, T1 and T2, where T1 > T2. (These
-     * thresholds can be set by the user, or selected by cross-validation.) Pick
-     * a point on the list and measure its distance to all other points. Put all
-     * points that are within distance threshold T1 into a canopy. Remove from
-     * the list all points that are within distance threshold T2. Repeat until
-     * the list is empty.
+     * Reference Implementation: Given a distance metric, one can create canopies as follows: Start with a
+     * list of the data points in any order, and with two distance thresholds, T1 and T2, where T1 > T2.
+     * (These thresholds can be set by the user, or selected by cross-validation.) Pick a point on the list
+     * and measure its distance to all other points. Put all points that are within distance threshold T1 into
+     * a canopy. Remove from the list all points that are within distance threshold T2. Repeat until the list
+     * is empty.
      */
     int nextCanopyId = 0;
     while (!points.isEmpty()) {
@@ -161,29 +174,34 @@
         Vector p2 = ptIter.next().get();
         double dist = measure.distance(p1, p2);
         // Put all points that are within distance threshold T1 into the canopy
-        if (dist < t1)
+        if (dist < t1) {
           canopy.addPoint(p2);
+        }
         // Remove from the list all points that are within distance threshold T2
-        if (dist < t2)
+        if (dist < t2) {
           ptIter.remove();
+        }
       }
     }
     return canopies;
   }
-
+  
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    generateSamples();
+    DisplayDirichlet.generateSamples();
     List<VectorWritable> points = new ArrayList<VectorWritable>();
-    points.addAll(sampleData);
-    List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
+    points.addAll(DisplayDirichlet.sampleData);
+    List<Canopy> canopies = DisplayKMeans.populateCanopies(new ManhattanDistanceMeasure(), points,
+      DisplayKMeans.t1, DisplayKMeans.t2);
     DistanceMeasure measure = new ManhattanDistanceMeasure();
-    clusters = new ArrayList<List<Cluster>>();
-    clusters.add(new ArrayList<Cluster>());
-    for (Canopy canopy : canopies)
-      if (canopy.getNumPoints() > 0.05 * sampleData.size())
-        clusters.get(0).add(new Cluster(canopy.getCenter()));
-    referenceKmeans(sampleData, clusters, measure, 10);
+    DisplayKMeans.clusters = new ArrayList<List<Cluster>>();
+    DisplayKMeans.clusters.add(new ArrayList<Cluster>());
+    for (Canopy canopy : canopies) {
+      if (canopy.getNumPoints() > 0.05 * DisplayDirichlet.sampleData.size()) {
+        DisplayKMeans.clusters.get(0).add(new Cluster(canopy.getCenter()));
+      }
+    }
+    DisplayKMeans.referenceKmeans(DisplayDirichlet.sampleData, DisplayKMeans.clusters, measure, 10);
     new DisplayKMeans();
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Sat Feb 13 19:07:36 2010
@@ -26,85 +26,90 @@
 
 import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
 import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
-import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.VectorWritable;
 
 class DisplayMeanShift extends DisplayDirichlet {
+  
+  private static final MeanShiftCanopyClusterer clusterer =
+    new MeanShiftCanopyClusterer(new EuclideanDistanceMeasure(), 1.0, 0.05, 0.5);
+  private static List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
+  
   private DisplayMeanShift() {
     initialize();
     this.setTitle("Canopy Clusters (> 1.5% of population)");
   }
-
-  private static final MeanShiftCanopyClusterer clusterer =
-      new MeanShiftCanopyClusterer(new EuclideanDistanceMeasure(), 1.0, 0.05, 0.5);
-  private static List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
-
   // TODO this is never queried?
   //private static final List<List<Vector>> iterationCenters = new ArrayList<List<Vector>>();
-
+  
   @Override
   public void paint(Graphics g) {
     Graphics2D g2 = (Graphics2D) g;
-    double sx = (double) res / ds;
+    double sx = (double) res / DisplayDirichlet.ds;
     g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
-
+    
     // plot the axes
     g2.setColor(Color.BLACK);
-    Vector dv = new DenseVector(2).assign(size / 2.0);
-    Vector dv1 = new DenseVector(2).assign(clusterer.getT1());
-    Vector dv2 = new DenseVector(2).assign(clusterer.getT2());
-    plotRectangle(g2, new DenseVector(2).assign(2), dv);
-    plotRectangle(g2, new DenseVector(2).assign(-2), dv);
-
+    Vector dv = new DenseVector(2).assign(DisplayDirichlet.size / 2.0);
+    Vector dv1 = new DenseVector(2).assign(DisplayMeanShift.clusterer.getT1());
+    Vector dv2 = new DenseVector(2).assign(DisplayMeanShift.clusterer.getT2());
+    DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(2), dv);
+    DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
+    
     // plot the sample data
     g2.setColor(Color.DARK_GRAY);
     dv.assign(0.03);
-    for (VectorWritable v : sampleData)
-      plotRectangle(g2, v.get(), dv);
+    for (VectorWritable v : DisplayDirichlet.sampleData) {
+      DisplayDirichlet.plotRectangle(g2, v.get(), dv);
+    }
     int i = 0;
-    for (MeanShiftCanopy canopy : canopies)
-      if (canopy.getBoundPoints().size() > 0.015 * sampleData.size()) {
-        g2.setColor(colors[Math.min(i++, colors.length - 1)]);
-        for (Vector v : canopy.getBoundPoints())
-          plotRectangle(g2, v, dv);
-        plotEllipse(g2, canopy.getCenter(), dv1);
-        plotEllipse(g2, canopy.getCenter(), dv2);
+    for (MeanShiftCanopy canopy : DisplayMeanShift.canopies) {
+      if (canopy.getBoundPoints().size() > 0.015 * DisplayDirichlet.sampleData.size()) {
+        g2.setColor(DisplayDirichlet.colors[Math.min(i++, DisplayDirichlet.colors.length - 1)]);
+        for (Vector v : canopy.getBoundPoints()) {
+          DisplayDirichlet.plotRectangle(g2, v, dv);
+        }
+        DisplayDirichlet.plotEllipse(g2, canopy.getCenter(), dv1);
+        DisplayDirichlet.plotEllipse(g2, canopy.getCenter(), dv2);
       }
+    }
   }
-
+  
   private static void testReferenceImplementation() {
     // add all points to the canopies
     int nextCanopyId = 0;
-    for (VectorWritable aRaw : sampleData) {
-      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw.get(), nextCanopyId++), canopies);
+    for (VectorWritable aRaw : DisplayDirichlet.sampleData) {
+      DisplayMeanShift.clusterer.mergeCanopy(
+          new MeanShiftCanopy(aRaw.get(), nextCanopyId++), DisplayMeanShift.canopies);
     }
     boolean done = false;
-    while (!done) {// shift canopies to their centroids
+    while (!done) { // shift canopies to their centroids
       done = true;
       List<MeanShiftCanopy> migratedCanopies = new ArrayList<MeanShiftCanopy>();
       //List<Vector> centers = new ArrayList<Vector>();
-      for (MeanShiftCanopy canopy : canopies) {
+      for (MeanShiftCanopy canopy : DisplayMeanShift.canopies) {
         //centers.add(canopy.getCenter());
-        done = clusterer.shiftToMean(canopy) && done;
-        clusterer.mergeCanopy(canopy, migratedCanopies);
+        done = DisplayMeanShift.clusterer.shiftToMean(canopy) && done;
+        DisplayMeanShift.clusterer.mergeCanopy(canopy, migratedCanopies);
       }
       //iterationCenters.add(centers);
-      canopies = migratedCanopies;
+      DisplayMeanShift.canopies = migratedCanopies;
     }
   }
-
+  
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    generateSamples();
-    testReferenceImplementation();
-    for (MeanShiftCanopy canopy : canopies)
+    DisplayDirichlet.generateSamples();
+    DisplayMeanShift.testReferenceImplementation();
+    for (MeanShiftCanopy canopy : DisplayMeanShift.canopies) {
       System.out.println(canopy.toString());
+    }
     new DisplayMeanShift();
   }
-
+  
   static void generateResults() {
     DisplayDirichlet.generateResults(new NormalModelDistribution());
   }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java Sat Feb 13 19:07:36 2010
@@ -20,14 +20,14 @@
  * Constants shared between examples.
  */
 public interface Constants {
-
-    /**
-     * Directory containing output for examples.
-     */
-    String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
-    /**
-     * Directory used to store the input after it has been processed from it's 
-     * original form into one suitable for processing by the clustering examples.
-     */
-    String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
+  
+  /**
+   * Directory containing output for examples.
+   */
+  String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
+  /**
+   * Directory used to store the input after it has been processed from it's
+   * original form into one suitable for processing by the clustering examples.
+   */
+  String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Sat Feb 13 19:07:36 2010
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.canopy;
 
+import java.io.IOException;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -39,29 +41,28 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-
 public class InputDriver {
   /**Logger for this class.*/
   private static final Logger LOG = LoggerFactory.getLogger(InputDriver.class);
-
+  
   private InputDriver() {
   }
-
+  
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
-        abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The vector implementation to use.").withShortName("v").create();
-
+      abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The vector implementation to use.").withShortName("v").create();
+    
     Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt).withOption(helpOpt).create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
+      vectorOpt).withOption(helpOpt).create();
 
     try {
       Parser parser = new Parser();
@@ -71,35 +72,35 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
       String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString();
-      runJob(input, output, vectorClassName);
+      InputDriver.runJob(input, output, vectorClassName);
     } catch (OptionException e) {
-      LOG.error("Exception parsing command line: ", e);
+      InputDriver.LOG.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   public static void runJob(String input, String output, String vectorClassName) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(InputDriver.class);
-
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(VectorWritable.class);
     conf.setOutputFormat(SequenceFileOutputFormat.class);
     conf.set("vector.implementation.class.name", vectorClassName);
     FileInputFormat.setInputPaths(conf, new Path(input));
     FileOutputFormat.setOutputPath(conf, new Path(output));
-
+    
     conf.setMapperClass(InputMapper.class);
-
+    
     conf.setReducerClass(Reducer.class);
     conf.setNumReduceTasks(0);
-
+    
     client.setConf(conf);
     JobClient.runJob(conf);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java Sat Feb 13 19:07:36 2010
@@ -17,41 +17,43 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.canopy;
 
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-
-public class InputMapper extends MapReduceBase implements
-    Mapper<LongWritable, Text, Text, VectorWritable> {
-
+public class InputMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,VectorWritable> {
+  
   private static final Pattern SPACE = Pattern.compile(" ");
-
+  
   private Constructor<?> constructor;
-
+  
   private VectorWritable vectorWritable;
-
+  
   @Override
-  public void map(LongWritable key, Text values,
-      OutputCollector<Text, VectorWritable> output, Reporter reporter) throws IOException {
-    String[] numbers = SPACE.split(values.toString());
+  public void map(LongWritable key,
+                  Text values,
+                  OutputCollector<Text,VectorWritable> output,
+                  Reporter reporter) throws IOException {
+    String[] numbers = InputMapper.SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     List<Double> doubles = new ArrayList<Double>();
     for (String value : numbers) {
-      if (value.length() > 0)
+      if (value.length() > 0) {
         doubles.add(Double.valueOf(value));
+      }
     }
     try {
       Vector result = (Vector) constructor.newInstance(doubles.size());
@@ -61,7 +63,7 @@
       }
       vectorWritable.set(result);
       output.collect(new Text(String.valueOf(index)), vectorWritable);
-
+      
     } catch (InstantiationException e) {
       throw new IllegalStateException(e);
     } catch (IllegalAccessException e) {
@@ -70,8 +72,7 @@
       throw new IllegalStateException(e);
     }
   }
-
-
+  
   @Override
   public void configure(JobConf job) {
     vectorWritable = new VectorWritable();
@@ -84,6 +85,6 @@
     } catch (ClassNotFoundException e) {
       throw new IllegalStateException(e);
     }
-
+    
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Sat Feb 13 19:07:36 2010
@@ -34,90 +34,83 @@
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
 import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.math.Vector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class Job {
-  /** Logger for this class.*/
+  /** Logger for this class. */
   private static final Logger LOG = LoggerFactory.getLogger(Job.class);
-
-  private Job() {
-  }
-
+  
+  private Job() { }
+  
   public static void main(String[] args) throws IOException, ClassNotFoundException {
-      DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-      ArgumentBuilder abuilder = new ArgumentBuilder();
-      GroupBuilder gbuilder = new GroupBuilder();
-
-      Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
-          abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-          withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
-      Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
-          abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-          withDescription("The Path to put the output in").withShortName("o").create();
-
-      Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-          abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
-          withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
-      Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-          abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
-          withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
-
-      Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
-          abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).
-          withDescription("t1").withShortName("t1").create();
-      Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
-          abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).
-          withDescription("t2").withShortName("t2").create();
-
-
-      Option helpOpt = obuilder.withLongName("help").
-          withDescription("Print out help").withShortName("h").create();
-
-      Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
-          .withOption(measureClassOpt).withOption(vectorClassOpt)
-          .withOption(t1Opt).withOption(t2Opt)
-          .withOption(helpOpt).create();
-
-
-      try {
-        Parser parser = new Parser();
-        parser.setGroup(group);
-        CommandLine cmdLine = parser.parse(args);
-
-        if (cmdLine.hasOption(helpOpt)) {
-          CommandLineUtil.printHelp(group);
-          return;
-        }
-
-        String input = cmdLine.getValue(inputOpt, "testdata").toString();
-        String output = cmdLine.getValue(outputOpt, "output").toString();
-        String measureClass = cmdLine.getValue(
-            measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
-
-        String className =  cmdLine.getValue(vectorClassOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString();
-        //Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
-        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
-        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
-
-        runJob(input, output, measureClass, t1, t2);
-      } catch (OptionException e) {
-        LOG.error("Exception", e);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+    
+    Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
+      abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+    Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+      abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The Path to put the output in").withShortName("o").create();
+    
+    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
+      abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+    // Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
+    // abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
+    // withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
+    
+    Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
+      abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
+      "t1").create();
+    Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
+      abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName(
+      "t2").create();
+    
+    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
+        .create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
+      measureClassOpt)// .withOption(vectorClassOpt)
+        .withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
+    
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      
+      if (cmdLine.hasOption(helpOpt)) {
         CommandLineUtil.printHelp(group);
+        return;
       }
+      
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      String measureClass = cmdLine.getValue(measureClassOpt,
+        "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+      
+      // String className = cmdLine.getValue(vectorClassOpt,
+      // "org.apache.mahout.math.RandomAccessSparseVector").toString();
+      // Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
+      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
+      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
+      
+      Job.runJob(input, output, measureClass, t1, t2);
+    } catch (OptionException e) {
+      Job.LOG.error("Exception", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
-
+  
   /**
-   * Run the canopy clustering job on an input dataset using the given distance
-   * measure, t1 and t2 parameters. All output data will be written to the
-   * output directory, which will be initially deleted if it exists. The
-   * clustered points will reside in the path <output>/clustered-points. By
-   * default, the job expects the a file containing synthetic_control.data as
-   * obtained from
-   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series
-   * resides in a directory named "testdata", and writes output to a directory
-   * named "output".
+   * Run the canopy clustering job on an input dataset using the given distance measure, t1 and t2 parameters.
+   * All output data will be written to the output directory, which will be initially deleted if it exists.
+   * The clustered points will reside in the path <output>/clustered-points. By default, the job expects the a
+   * file containing synthetic_control.data as obtained from
+   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series resides in a directory named
+   * "testdata", and writes output to a directory named "output".
    * 
    * @param input
    *          the String denoting the input directory path
@@ -130,21 +123,20 @@
    * @param t2
    *          the canopy T2 threshold
    */
-  private static void runJob(String input, String output,
-      String measureClassName, double t1, double t2) throws IOException {
+  private static void runJob(String input, String output, String measureClassName, double t1, double t2) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
-
+    
     Path outPath = new Path(output);
     client.setConf(conf);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
-    if (dfs.exists(outPath))
+    if (dfs.exists(outPath)) {
       dfs.delete(outPath, true);
-    String directoryContainingConvertedInput = output
-        + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
-    InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
-    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output,
-        measureClassName, t1, t2);
+    }
+    String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    InputDriver.runJob(input, directoryContainingConvertedInput,
+      "org.apache.mahout.math.RandomAccessSparseVector");
+    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Sat Feb 13 19:07:36 2010
@@ -48,43 +48,43 @@
 import org.slf4j.LoggerFactory;
 
 public class Job {
-
-  /**Logger for this class.*/
+  
+  /** Logger for this class. */
   private static final Logger log = LoggerFactory.getLogger(Job.class);
-
-  private Job() {
-  }
-
+  
+  private Job() { }
+  
   public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().withRequired(false).create();
     Option topicsOpt = DefaultOptionCreator.kOption().withRequired(false).create();
-
+    
     Option redOpt = obuilder.withLongName("reducerNum").withRequired(false).withArgument(
-        abuilder.withName("r").withMinimum(1).withMaximum(1).create()).withDescription("The number of reducers to use.")
-        .withShortName("r").create();
-
+      abuilder.withName("r").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The number of reducers to use.").withShortName("r").create();
+    
     Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
-        abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription("The vector implementation to use.")
-        .withShortName("v").create();
-
+      abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The vector implementation to use.").withShortName("v").create();
+    
     Option mOpt = obuilder.withLongName("alpha").withRequired(false).withShortName("m").withArgument(
-        abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The alpha0 value for the DirichletDistribution.").create();
-
-    Option modelOpt = obuilder.withLongName("modelClass").withRequired(false).withShortName("d").withArgument(
-        abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create())
+      abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The alpha0 value for the DirichletDistribution.").create();
+    
+    Option modelOpt = obuilder.withLongName("modelClass").withRequired(false).withShortName("d")
+        .withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create())
         .withDescription("The ModelDistribution class name.").create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(
-        maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(redOpt).withOption(helpOpt).create();
-
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt)
+        .withOption(redOpt).withOption(helpOpt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -93,41 +93,50 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
       String modelFactory = cmdLine.getValue(modelOpt,
-          "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution").toString();
+        "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution").toString();
       int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt, "10").toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "5").toString());
       double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt, "1.0").toString());
       int numReducers = Integer.parseInt(cmdLine.getValue(redOpt, "1").toString());
-      String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString();
-      runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers, vectorClassName);
+      String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector")
+          .toString();
+      Job
+          .runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers,
+            vectorClassName);
     } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
+      Job.log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
-   * Run the job using supplied arguments, deleting the output directory if it
-   * exists beforehand
+   * Run the job using supplied arguments, deleting the output directory if it exists beforehand
    * 
-   * @param input the directory pathname for input points
-   * @param output the directory pathname for output points
-   * @param modelFactory the ModelDistribution class name
-   * @param numModels the number of Models
-   * @param maxIterations the maximum number of iterations
-   * @param alpha_0 the alpha0 value for the DirichletDistribution
-   * @param numReducers the desired number of reducers
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
-   * @throws ClassNotFoundException 
-   * @throws InvocationTargetException 
-   * @throws NoSuchMethodException 
-   * @throws IllegalArgumentException 
-   * @throws SecurityException 
+   * @param input
+   *          the directory pathname for input points
+   * @param output
+   *          the directory pathname for output points
+   * @param modelFactory
+   *          the ModelDistribution class name
+   * @param numModels
+   *          the number of Models
+   * @param maxIterations
+   *          the maximum number of iterations
+   * @param alpha_0
+   *          the alpha0 value for the DirichletDistribution
+   * @param numReducers
+   *          the desired number of reducers
+   * @throws IllegalAccessException
+   * @throws InstantiationException
+   * @throws ClassNotFoundException
+   * @throws InvocationTargetException
+   * @throws NoSuchMethodException
+   * @throws IllegalArgumentException
+   * @throws SecurityException
    */
   public static void runJob(String input,
                             String output,
@@ -136,9 +145,14 @@
                             int maxIterations,
                             double alpha_0,
                             int numReducers,
-                            String vectorClassName)
-      throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
-      SecurityException, IllegalArgumentException, NoSuchMethodException, InvocationTargetException {
+                            String vectorClassName) throws IOException,
+                                                   ClassNotFoundException,
+                                                   InstantiationException,
+                                                   IllegalAccessException,
+                                                   SecurityException,
+                                                   IllegalArgumentException,
+                                                   NoSuchMethodException,
+                                                   InvocationTargetException {
     // delete the output directory
     JobConf conf = new JobConf(DirichletJob.class);
     Path outPath = new Path(output);
@@ -149,33 +163,41 @@
     fs.mkdirs(outPath);
     String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
     InputDriver.runJob(input, directoryContainingConvertedInput, vectorClassName);
-    DirichletDriver.runJob(directoryContainingConvertedInput,
-                           output + "/state",
-                           modelFactory,
-                           vectorClassName,
-                           60,
-                           numModels,
-                           maxIterations,
-                           alpha_0,
-                           numReducers);
-    printResults(output + "/state", modelFactory, vectorClassName, 60, maxIterations, numModels, alpha_0);
+    DirichletDriver.runJob(directoryContainingConvertedInput, output + "/state", modelFactory,
+      vectorClassName, 60, numModels, maxIterations, alpha_0, numReducers);
+    Job.printResults(output + "/state", modelFactory, vectorClassName, 60, maxIterations, numModels, alpha_0);
   }
-
+  
   /**
    * Prints out all of the clusters during each iteration
-   * @param output the String output directory
-   * @param modelDistribution the String class name of the ModelDistribution
-   * @param vectorClassName the String class name of the Vector to use
-   * @param prototypeSize the size of the Vector prototype for the Dirichlet Models
-   * @param numIterations the int number of Iterations
-   * @param numModels the int number of models
-   * @param alpha_0 the double alpha_0 value
-   * @throws InvocationTargetException 
-   * @throws NoSuchMethodException 
+   * 
+   * @param output
+   *          the String output directory
+   * @param modelDistribution
+   *          the String class name of the ModelDistribution
+   * @param vectorClassName
+   *          the String class name of the Vector to use
+   * @param prototypeSize
+   *          the size of the Vector prototype for the Dirichlet Models
+   * @param numIterations
+   *          the int number of Iterations
+   * @param numModels
+   *          the int number of models
+   * @param alpha_0
+   *          the double alpha_0 value
+   * @throws InvocationTargetException
+   * @throws NoSuchMethodException
    * @throws SecurityException
    */
-  public static void printResults(String output, String modelDistribution, String vectorClassName, int prototypeSize,
-      int numIterations, int numModels, double alpha_0) throws SecurityException, NoSuchMethodException, InvocationTargetException {
+  public static void printResults(String output,
+                                  String modelDistribution,
+                                  String vectorClassName,
+                                  int prototypeSize,
+                                  int numIterations,
+                                  int numModels,
+                                  double alpha_0) throws SecurityException,
+                                                 NoSuchMethodException,
+                                                 InvocationTargetException {
     List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf.set(DirichletDriver.MODEL_FACTORY_KEY, modelDistribution);
@@ -187,14 +209,17 @@
       conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
       clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
     }
-    printResults(clusters, 0);
-
+    Job.printResults(clusters, 0);
+    
   }
-
+  
   /**
    * Actually prints out the clusters
-   * @param clusters a List of Lists of DirichletClusters
-   * @param significant the minimum number of samples to enable printing a model
+   * 
+   * @param clusters
+   *          a List of Lists of DirichletClusters
+   * @param significant
+   *          the minimum number of samples to enable printing a model
    */
   private static void printResults(List<List<DirichletCluster<VectorWritable>>> clusters, int significant) {
     int row = 0;
@@ -211,6 +236,6 @@
       result.append('\n');
     }
     result.append('\n');
-    log.info(result.toString());
+    Job.log.info(result.toString());
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java Sat Feb 13 19:07:36 2010
@@ -29,14 +29,15 @@
  * DirichletCluster algorithm. Uses a Normal Distribution
  */
 public class NormalScModelDistribution extends NormalModelDistribution {
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPrior(int howMany) {
     Model<VectorWritable>[] result = new NormalModel[howMany];
     for (int i = 0; i < howMany; i++) {
       DenseVector mean = new DenseVector(60);
-      for (int j = 0; j < 60; j++)
+      for (int j = 0; j < 60; j++) {
         mean.set(j, UncommonDistributions.rNorm(30, 0.5));
+      }
       result[i] = new NormalModel(mean, 1);
     }
     return result;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sat Feb 13 19:07:36 2010
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.kmeans;
 
-import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
-
 import java.io.IOException;
 
 import org.apache.commons.cli2.CommandLine;
@@ -36,117 +34,127 @@
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.syntheticcontrol.Constants;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.apache.mahout.math.Vector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class Job {
-
-  /** Logger for this class.*/
+  
+  /** Logger for this class. */
   private static final Logger LOG = LoggerFactory.getLogger(Job.class);
-
-  private Job() {
-  }
-
-  public static void main(String[] args) throws IOException,
-      ClassNotFoundException {
+  
+  private Job() { }
+  
+  public static void main(String[] args) throws IOException, ClassNotFoundException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
     Option maxIterationsOpt = DefaultOptionCreator.maxIterOption().withRequired(false).create();
-
+    
     Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
-
+      abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+    
     Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
-        abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The t1 value to use.").withShortName("m").create();
+      abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("The t1 value to use.")
+        .withShortName("m").create();
     Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
-        abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The t2 value to use.").withShortName("m").create();
+      abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("The t2 value to use.")
+        .withShortName("m").create();
     Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
-
+      abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v")
+        .create();
+    
     Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
-        .withOption(measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
+      measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
         .withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-
+      
       if (cmdLine.hasOption(helpOpt)) {
         CommandLineUtil.printHelp(group);
         return;
       }
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
-      String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+      String measureClass = cmdLine.getValue(measureClassOpt,
+        "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
       double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
       double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
       double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());
-      String className = cmdLine.getValue(vectorClassOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString();
-      Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
-
-      runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
+      // String className = cmdLine.getValue(vectorClassOpt,
+      // "org.apache.mahout.math.RandomAccessSparseVector").toString();
+      // Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
+      
+      Job.runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
     } catch (OptionException e) {
-      LOG.error("Exception", e);
+      Job.LOG.error("Exception", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
-   * Run the kmeans clustering job on an input dataset using the given distance
-   * measure, t1, t2 and iteration parameters. All output data will be written
-   * to the output directory, which will be initially deleted if it exists. The
-   * clustered points will reside in the path <output>/clustered-points. By
-   * default, the job expects the a file containing synthetic_control.data as
-   * obtained from
-   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series
-   * resides in a directory named "testdata", and writes output to a directory
-   * named "output".
+   * Run the kmeans clustering job on an input dataset using the given distance measure, t1, t2 and iteration
+   * parameters. All output data will be written to the output directory, which will be initially deleted if
+   * it exists. The clustered points will reside in the path <output>/clustered-points. By default, the job
+   * expects the a file containing synthetic_control.data as obtained from
+   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series resides in a directory named
+   * "testdata", and writes output to a directory named "output".
    * 
-   * @param input the String denoting the input directory path
-   * @param output the String denoting the output directory path
-   * @param measureClass the String class name of the DistanceMeasure to use
-   * @param t1 the canopy T1 threshold
-   * @param t2 the canopy T2 threshold
-   * @param convergenceDelta the double convergence criteria for iterations
-   * @param maxIterations the int maximum number of iterations
+   * @param input
+   *          the String denoting the input directory path
+   * @param output
+   *          the String denoting the output directory path
+   * @param measureClass
+   *          the String class name of the DistanceMeasure to use
+   * @param t1
+   *          the canopy T1 threshold
+   * @param t2
+   *          the canopy T2 threshold
+   * @param convergenceDelta
+   *          the double convergence criteria for iterations
+   * @param maxIterations
+   *          the int maximum number of iterations
    */
-  private static void runJob(String input, String output, String measureClass,
-      double t1, double t2, double convergenceDelta, int maxIterations) throws IOException {
+  private static void runJob(String input,
+                             String output,
+                             String measureClass,
+                             double t1,
+                             double t2,
+                             double convergenceDelta,
+                             int maxIterations) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
-
+    
     Path outPath = new Path(output);
     client.setConf(conf);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
-    if (dfs.exists(outPath))
+    if (dfs.exists(outPath)) {
       dfs.delete(outPath, true);
-    final String directoryContainingConvertedInput = output
-        + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    }
+    final String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
     System.out.println("Preparing Input");
-    InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
+    InputDriver.runJob(input, directoryContainingConvertedInput,
+      "org.apache.mahout.math.RandomAccessSparseVector");
     System.out.println("Running Canopy to get initial clusters");
-    CanopyDriver.runJob(directoryContainingConvertedInput, output
-        + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClass,
-        t1, t2);
+    CanopyDriver.runJob(directoryContainingConvertedInput,
+      output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClass, t1, t2);
     System.out.println("Running KMeans");
-    KMeansDriver.runJob(directoryContainingConvertedInput, output
-        + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output,
-        measureClass, convergenceDelta, maxIterations, 1);
+    KMeansDriver.runJob(directoryContainingConvertedInput,
+      output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output, measureClass, convergenceDelta,
+      maxIterations, 1);
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Sat Feb 13 19:07:36 2010
@@ -40,20 +40,20 @@
 import org.slf4j.LoggerFactory;
 
 public class InputDriver {
-  /**Logger for this class.*/
+  /** Logger for this class. */
   private static final Logger LOG = LoggerFactory.getLogger(InputDriver.class);
-
-  private InputDriver() {
-  }
-
+  
+  private InputDriver() { }
+  
   public static void main(String[] args) throws IOException {
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
-
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt)
+        .create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -62,34 +62,32 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
-      runJob(input, output);
+      InputDriver.runJob(input, output);
     } catch (OptionException e) {
-      LOG.error("Exception parsing command line: ", e);
+      InputDriver.LOG.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   public static void runJob(String input, String output) throws IOException {
     JobClient client = new JobClient();
-    JobConf conf = new JobConf(
-        org.apache.mahout.clustering.syntheticcontrol.meanshift.InputDriver.class);
-
+    JobConf conf = new JobConf(org.apache.mahout.clustering.syntheticcontrol.meanshift.InputDriver.class);
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(MeanShiftCanopy.class);
-
+    
     FileInputFormat.setInputPaths(conf, new Path(input));
     FileOutputFormat.setOutputPath(conf, new Path(output));
     conf.setOutputFormat(SequenceFileOutputFormat.class);
-    conf
-        .setMapperClass(org.apache.mahout.clustering.syntheticcontrol.meanshift.InputMapper.class);
+    conf.setMapperClass(org.apache.mahout.clustering.syntheticcontrol.meanshift.InputMapper.class);
     conf.setReducerClass(Reducer.class);
     conf.setNumReduceTasks(0);
-
+    
     client.setConf(conf);
     JobClient.runJob(conf);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java Sat Feb 13 19:07:36 2010
@@ -17,6 +17,11 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.meanshift;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -27,33 +32,31 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-
-public class InputMapper extends MapReduceBase implements
-    Mapper<LongWritable, Text, Text, MeanShiftCanopy> {
-
+public class InputMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,MeanShiftCanopy> {
+  
   private static final Pattern SPACE = Pattern.compile(" ");
   private int nextCanopyId = 0;
   
   @Override
-  public void map(LongWritable key, Text values,
-      OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter) throws IOException {
-    String[] numbers = SPACE.split(values.toString());
+  public void map(LongWritable key,
+                  Text values,
+                  OutputCollector<Text,MeanShiftCanopy> output,
+                  Reporter reporter) throws IOException {
+    String[] numbers = InputMapper.SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     List<Double> doubles = new ArrayList<Double>();
     for (String value : numbers) {
-      if (value.length() > 0)
+      if (value.length() > 0) {
         doubles.add(Double.valueOf(value));
+      }
     }
     Vector point = new DenseVector(doubles.size());
     int index = 0;
-    for (Double d : doubles)
+    for (Double d : doubles) {
       point.set(index++, d);
+    }
     MeanShiftCanopy canopy = new MeanShiftCanopy(point, nextCanopyId++);
     output.collect(new Text(), canopy);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Sat Feb 13 19:07:36 2010
@@ -42,40 +42,38 @@
 public class Job {
   /** Logger for this class. */
   private static final Logger LOG = LoggerFactory.getLogger(Job.class);
-
+  
   private static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
-
-  private Job() {
-  }
-
+  
+  private Job() { }
+  
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().withRequired(false).create();
-    Option helpOpt = DefaultOptionCreator.helpOption();    
-
-    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(false).withShortName("d").
-        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
-        withDescription("The distance measure class name.").create();
-
-
-    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(false).withShortName("t1").
-        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(false).withShortName("t2").
-        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
-        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
-        withOption(threshold2Opt).create();
-
+    Option helpOpt = DefaultOptionCreator.helpOption();
+    
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(false).withShortName("d")
+        .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create())
+        .withDescription("The distance measure class name.").create();
+    
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(false).withShortName("t1")
+        .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(false).withShortName("t2")
+        .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt)
+        .withOption(maxIterOpt).withOption(threshold2Opt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -84,47 +82,55 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
-      String measureClassName = cmdLine.getValue(modelOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+      String measureClassName = cmdLine.getValue(modelOpt,
+        "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
       double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt, "47.6").toString());
       double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt, "1").toString());
       double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "10").toString());
-      runJob(input, output, measureClassName, t1, t2, convergenceDelta,
-          maxIterations);
+      Job.runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations);
     } catch (OptionException e) {
-      LOG.error("Exception parsing command line: ", e);
+      Job.LOG.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
-   * Run the meanshift clustering job on an input dataset using the given
-   * distance measure, t1, t2 and iteration parameters. All output data will be
-   * written to the output directory, which will be initially deleted if it
-   * exists. The clustered points will reside in the path
-   * <output>/clustered-points. By default, the job expects the a file
-   * containing synthetic_control.data as obtained from
-   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series
-   * resides in a directory named "testdata", and writes output to a directory
-   * named "output".
+   * Run the meanshift clustering job on an input dataset using the given distance measure, t1, t2 and
+   * iteration parameters. All output data will be written to the output directory, which will be initially
+   * deleted if it exists. The clustered points will reside in the path <output>/clustered-points. By default,
+   * the job expects the a file containing synthetic_control.data as obtained from
+   * http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series resides in a directory named
+   * "testdata", and writes output to a directory named "output".
    * 
-   * @param input the String denoting the input directory path
-   * @param output the String denoting the output directory path
-   * @param measureClassName the String class name of the DistanceMeasure to use
-   * @param t1 the meanshift canopy T1 threshold
-   * @param t2 the meanshift canopy T2 threshold
-   * @param convergenceDelta the double convergence criteria for iterations
-   * @param maxIterations the int maximum number of iterations
+   * @param input
+   *          the String denoting the input directory path
+   * @param output
+   *          the String denoting the output directory path
+   * @param measureClassName
+   *          the String class name of the DistanceMeasure to use
+   * @param t1
+   *          the meanshift canopy T1 threshold
+   * @param t2
+   *          the meanshift canopy T2 threshold
+   * @param convergenceDelta
+   *          the double convergence criteria for iterations
+   * @param maxIterations
+   *          the int maximum number of iterations
    */
-  private static void runJob(String input, String output,
-      String measureClassName, double t1, double t2, double convergenceDelta,
-      int maxIterations) throws IOException {
+  private static void runJob(String input,
+                             String output,
+                             String measureClassName,
+                             double t1,
+                             double t2,
+                             double convergenceDelta,
+                             int maxIterations) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
-
+    
     Path outPath = new Path(output);
     client.setConf(conf);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
@@ -133,11 +139,11 @@
     }
     String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
     InputDriver.runJob(input, directoryContainingConvertedInput);
-    MeanShiftCanopyJob.runJob(directoryContainingConvertedInput, output + "/meanshift",
-        measureClassName, t1, t2, convergenceDelta, maxIterations);
+    MeanShiftCanopyJob.runJob(directoryContainingConvertedInput, output + "/meanshift", measureClassName, t1,
+      t2, convergenceDelta, maxIterations);
     FileStatus[] status = dfs.listStatus(new Path(output + "/meanshift"));
     OutputDriver.runJob(status[status.length - 1].getPath().toString(),
-        output + CLUSTERED_POINTS_OUTPUT_DIRECTORY);
+      output + Job.CLUSTERED_POINTS_OUTPUT_DIRECTORY);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java Sat Feb 13 19:07:36 2010
@@ -41,18 +41,18 @@
 public class OutputDriver {
   /** Logger for this class. */
   private static final Logger LOG = LoggerFactory.getLogger(OutputDriver.class);
-
-  private OutputDriver() {
-  }
-
+  
+  private OutputDriver() { }
+  
   public static void main(String[] args) throws IOException {
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
     Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
-
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt)
+        .create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -61,35 +61,34 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt, "testdata").toString();
       String output = cmdLine.getValue(outputOpt, "output").toString();
-      runJob(input, output);
+      OutputDriver.runJob(input, output);
     } catch (OptionException e) {
-      LOG.error("Exception parsing command line: ", e);
+      OutputDriver.LOG.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   public static void runJob(String input, String output) throws IOException {
     JobClient client = new JobClient();
-    JobConf conf = new JobConf(
-        org.apache.mahout.clustering.syntheticcontrol.meanshift.OutputDriver.class);
-
+    JobConf conf = new JobConf(org.apache.mahout.clustering.syntheticcontrol.meanshift.OutputDriver.class);
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Text.class);
     conf.setInputFormat(SequenceFileInputFormat.class);
-
+    
     FileInputFormat.setInputPaths(conf, new Path(input));
     FileOutputFormat.setOutputPath(conf, new Path(output));
-
+    
     conf.setMapperClass(OutputMapper.class);
-
+    
     conf.setReducerClass(Reducer.class);
     conf.setNumReduceTasks(0);
-
+    
     client.setConf(conf);
     JobClient.runJob(conf);
   }
-
+  
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java?rev=909871&r1=909870&r2=909871&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java Sat Feb 13 19:07:36 2010
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.meanshift;
 
+import java.io.IOException;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -27,27 +29,24 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-
-public class OutputMapper extends MapReduceBase implements
-    Mapper<Text, MeanShiftCanopy, Text, Text> {
-
+public class OutputMapper extends MapReduceBase implements Mapper<Text,MeanShiftCanopy,Text,Text> {
+  
   private static final Logger log = LoggerFactory.getLogger(OutputMapper.class);
-
+  
   private int clusters = 0;
-
+  
   @Override
-  public void map(Text key, MeanShiftCanopy canopy, OutputCollector<Text, Text> output,
-      Reporter reporter) throws IOException {
+  public void map(Text key, MeanShiftCanopy canopy, OutputCollector<Text,Text> output, Reporter reporter) throws IOException {
     clusters++;
-    for (Vector point : canopy.getBoundPoints())
+    for (Vector point : canopy.getBoundPoints()) {
       output.collect(key, new Text(point.asFormatString()));
+    }
   }
-
+  
   @Override
   public void close() throws IOException {
-    log.info("+++ Clusters={}", clusters);
+    OutputMapper.log.info("+++ Clusters={}", clusters);
     super.close();
   }
-
+  
 }