You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/10 11:35:28 UTC
svn commit: r792856 [8/13] - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/common/ main/java/org/apache/mahout/cf/taste/eval/ main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/impl/common/ main/j...

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java Fri Jul 10 09:35:19 2009
@@ -17,13 +17,13 @@
  * limitations under the License.
  */
 
-import java.util.Random;
-
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;
 import org.uncommons.maths.random.GaussianGenerator;
 import org.uncommons.maths.random.MersenneTwisterRNG;
 
+import java.util.Random;
+
 public final class UncommonDistributions {
 
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
@@ -38,12 +38,10 @@
   }
 
   //=============== start of BSD licensed code. See LICENSE.txt
-  /** 
-   * Returns a double sampled according to this distribution.  Uniformly
-   * fast for all k > 0.  (Reference: Non-Uniform Random Variate Generation,
-   * Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)  Uses Cheng's
-   * rejection algorithm (GB) for k>=1, rejection from Weibull distribution
-   * for 0 < k < 1.
+  /**
+   * Returns a double sampled according to this distribution.  Uniformly fast for all k > 0.  (Reference: Non-Uniform
+   * Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)  Uses Cheng's rejection algorithm
+   * (GB) for k>=1, rejection from Weibull distribution for 0 < k < 1.
    */
   public static double rGamma(double k, double lambda) {
     boolean accept = false;
@@ -88,9 +86,8 @@
   //============= end of BSD licensed code
 
   /**
-   * Returns a random sample from a beta distribution with
-   * the given shapes
-   * 
+   * Returns a random sample from a beta distribution with the given shapes
+   *
    * @param shape1 a double representing shape1
    * @param shape2 a double representing shape2
    * @return a Vector of samples
@@ -103,10 +100,9 @@
   }
 
   /**
-   * Returns a vector of random samples from a beta distribution with
-   * the given shapes
-   * 
-   * @param K the number of samples to return
+   * Returns a vector of random samples from a beta distribution with the given shapes
+   *
+   * @param K      the number of samples to return
    * @param shape1 a double representing shape1
    * @param shape2 a double representing shape2
    * @return a Vector of samples
@@ -116,15 +112,15 @@
     //params.add(shape1);
     //params.add(Math.max(0, shape2));
     Vector result = new DenseVector(K);
-    for (int i = 0; i < K; i++)
+    for (int i = 0; i < K; i++) {
       result.set(i, rBeta(shape1, shape2));
+    }
     return result;
   }
 
   /**
-   * Return a random sample from the chi-squared (chi^2) distribution with df 
-   * degrees of freedom.
-   * @param df
+   * Return a random sample from the chi-squared (chi^2) distribution with df degrees of freedom.
+   *
    * @return a double sample
    */
   public static double rChisq(double df) {
@@ -137,11 +133,10 @@
   }
 
   /**
-   * Return a random value from a normal distribution with the given mean and
-   * standard deviation
-   * 
+   * Return a random value from a normal distribution with the given mean and standard deviation
+   *
    * @param mean a double mean value
-   * @param sd a double standard deviation
+   * @param sd   a double standard deviation
    * @return a double sample
    */
   public static double rNorm(double mean, double sd) {
@@ -151,10 +146,9 @@
 
   /**
    * Return the normal density function value for the sample x
-   * 
+   *
    * pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
-   *  
-   * 
+   *
    * @param x a double sample value
    * @param m a double mean value
    * @param s a double standard deviation
@@ -167,9 +161,7 @@
     return exp / (sqrt2pi * s);
   }
 
-  /**
-  * Returns one sample from a multinomial.
-  */
+  /** Returns one sample from a multinomial. */
   public static int rMultinom(Vector probabilities) {
     // our probability argument are not normalized.
     double total = probabilities.zSum();
@@ -189,19 +181,15 @@
 
   /**
    * Returns a multinomial vector sampled from the given probabilities
-   * 
+   *
    * rmultinom should be implemented as successive binomial sampling.
    *
-   *Keep a normalizing amount that starts with 1 (I call it total).
+   * Keep a normalizing amount that starts with 1 (I call it total).
+   *
+   * For each i k[i] = rbinom(p[i] / total, size); total -= p[i]; size -= k[i];
    *
-   * For each i
-   *  k[i] = rbinom(p[i] / total, size);
-   *  total -= p[i];
-   *  size -= k[i];
-   * 
-   * @param size the size parameter of the binomial distribution
+   * @param size          the size parameter of the binomial distribution
    * @param probabilities a Vector of probabilities
-   * 
    * @return a multinomial distribution Vector
    */
   public static Vector rMultinom(int size, Vector probabilities) {
@@ -220,14 +208,14 @@
   }
 
   /**
-   * Returns an integer sampled according to this distribution.  Takes time
-   * proprotional to np + 1.  (Reference: Non-Uniform Random Variate 
-   * Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) 
-   * Second time-waiting algorithm.
+   * Returns an integer sampled according to this distribution.  Takes time proprotional to np + 1.  (Reference:
+   * Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Second time-waiting
+   * algorithm.
    */
   public static int rBinomial(int n, double p) {
-    if (p >= 1)
+    if (p >= 1) {
       return n; // needed to avoid infinite loops and negative results
+    }
     double q = -Math.log(1 - p);
     double sum = 0;
     int x = 0;
@@ -237,15 +225,18 @@
       sum += (e / (n - x));
       x += 1;
     }
-    if (x == 0)
-      return 0;
+    if (x == 0) {
+      {
+        return 0;
+      }
+    }
     return x - 1;
   }
 
   /**
-   * Sample from a Dirichlet distribution over the given alpha, 
-   * returning a vector of probabilities using a stick-breaking algorithm
-   * 
+   * Sample from a Dirichlet distribution over the given alpha, returning a vector of probabilities using a
+   * stick-breaking algorithm
+   *
    * @param alpha an unnormalized count Vector
    * @return a Vector of probabilities
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Fri Jul 10 09:35:19 2009
@@ -22,10 +22,9 @@
 import org.apache.mahout.matrix.Vector;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the
- * DirichletCluster algorithm. Uses a Normal Distribution to sample the prior
- * model values. Model values have a vector standard deviation, allowing assymetrical
- * regions to be covered by a model.
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
+ * Normal Distribution to sample the prior model values. Model values have a vector standard deviation, allowing
+ * assymetrical regions to be covered by a model.
  */
 public class AsymmetricSampledNormalDistribution implements
     ModelDistribution<Vector> {
@@ -34,11 +33,11 @@
   public Model<Vector>[] sampleFromPrior(int howMany) {
     Model<Vector>[] result = new AsymmetricSampledNormalModel[howMany];
     for (int i = 0; i < howMany; i++) {
-      double[] m = { UncommonDistributions.rNorm(0, 1),
-          UncommonDistributions.rNorm(0, 1) };
+      double[] m = {UncommonDistributions.rNorm(0, 1),
+          UncommonDistributions.rNorm(0, 1)};
       DenseVector mean = new DenseVector(m);
-      double[] s = { UncommonDistributions.rNorm(1, 1),
-          UncommonDistributions.rNorm(1, 1) };
+      double[] s = {UncommonDistributions.rNorm(1, 1),
+          UncommonDistributions.rNorm(1, 1)};
       DenseVector sd = new DenseVector(s);
       result[i] = new AsymmetricSampledNormalModel(mean, sd);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java Fri Jul 10 09:35:19 2009
@@ -16,14 +16,14 @@
  */
 package org.apache.mahout.clustering.dirichlet.models;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
 import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.SquareRootFunction;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
 public class AsymmetricSampledNormalModel implements Model<Vector> {
 
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
@@ -55,7 +55,7 @@
 
   /**
    * Return an instance with the same parameters
-   * 
+   *
    * @return an AsymmetricSampledNormalModel
    */
   AsymmetricSampledNormalModel sample() {
@@ -65,20 +65,23 @@
   @Override
   public void observe(Vector x) {
     s0++;
-    if (s1 == null)
+    if (s1 == null) {
       s1 = x.clone();
-    else
+    } else {
       s1 = s1.plus(x);
-    if (s2 == null)
+    }
+    if (s2 == null) {
       s2 = x.times(x);
-    else
+    } else {
       s2 = s2.plus(x.times(x));
+    }
   }
 
   @Override
   public void computeParameters() {
-    if (s0 == 0)
+    if (s0 == 0) {
       return;
+    }
     mean = s1.divide(s0);
     // compute the two component stds
     if (s0 > 1) {
@@ -91,10 +94,9 @@
 
   /**
    * Calculate a pdf using the supplied sample and sd
-   * 
-   * @param x a Vector sample
+   *
+   * @param x  a Vector sample
    * @param sd a double std deviation
-   * @return
    */
   private double pdf(Vector x, double sd) {
     assert x.getNumNondefaultElements() == 2;
@@ -124,13 +126,17 @@
   public String toString() {
     StringBuilder buf = new StringBuilder();
     buf.append("asnm{n=").append(s0).append(" m=[");
-    if (mean != null)
-      for (int i = 0; i < mean.size(); i++)
+    if (mean != null) {
+      for (int i = 0; i < mean.size(); i++) {
         buf.append(String.format("%.2f", mean.get(i))).append(", ");
+      }
+    }
     buf.append("] sd=[");
-    if (sd != null)
-      for (int i = 0; i < sd.size(); i++)
+    if (sd != null) {
+      for (int i = 0; i < sd.size(); i++) {
         buf.append(String.format("%.2f", sd.get(i))).append(", ");
+      }
+    }
     buf.append("]}");
     return buf.toString();
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java Fri Jul 10 09:35:19 2009
@@ -20,35 +20,32 @@
  */
 
 /**
- * A model is a probability distribution over observed data points and allows 
- * the probability of any data point to be computed.
+ * A model is a probability distribution over observed data points and allows the probability of any data point to be
+ * computed.
  */
 public interface Model<O> extends Writable {
 
   /**
    * Observe the given observation, retaining information about it
-   * 
+   *
    * @param x an Observation from the posterior
    */
   void observe(O x);
 
-  /**
-   * Compute a new set of posterior parameters based upon the Observations 
-   * that have been observed since my creation
-   */
+  /** Compute a new set of posterior parameters based upon the Observations that have been observed since my creation */
   void computeParameters();
 
   /**
-  * Return the probability that the observation is described by this model
-  * 
-  * @param x an Observation from the posterior
-  * @return the probability that x is in the receiver
-  */
+   * Return the probability that the observation is described by this model
+   *
+   * @param x an Observation from the posterior
+   * @return the probability that x is in the receiver
+   */
   double pdf(O x);
 
   /**
    * Return the number of observations that have been observed by this model
-   * 
+   *
    * @return an int
    */
   int count();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java Fri Jul 10 09:35:19 2009
@@ -17,14 +17,12 @@
  * limitations under the License.
  */
 
-/**
- * A model distribution allows us to sample a model from its prior distribution.
- */
+/** A model distribution allows us to sample a model from its prior distribution. */
 public interface ModelDistribution<O> {
 
   /**
    * Return a list of models sampled from the prior
-   * 
+   *
    * @param howMany the int number of models to return
    * @return a Model<Observation>[] representing what is known apriori
    */
@@ -32,7 +30,7 @@
 
   /**
    * Return a list of models sampled from the posterior
-   * 
+   *
    * @param posterior the Model<Observation>[] after observations
    * @return a Model<Observation>[] representing what is known apriori
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java Fri Jul 10 09:35:19 2009
@@ -16,14 +16,14 @@
  */
 package org.apache.mahout.clustering.dirichlet.models;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
 import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.SquareRootFunction;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
 public class NormalModel implements Model<Vector> {
 
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
@@ -52,9 +52,8 @@
   }
 
   /**
-   * TODO: Return a proper sample from the posterior. For now, return an 
-   * instance with the same parameters
-   * 
+   * TODO: Return a proper sample from the posterior. For now, return an instance with the same parameters
+   *
    * @return an NormalModel
    */
   public NormalModel sample() {
@@ -64,28 +63,32 @@
   @Override
   public void observe(Vector x) {
     s0++;
-    if (s1 == null)
+    if (s1 == null) {
       s1 = x.clone();
-    else
+    } else {
       s1 = s1.plus(x);
-    if (s2 == null)
+    }
+    if (s2 == null) {
       s2 = x.times(x);
-    else
+    } else {
       s2 = s2.plus(x.times(x));
+    }
   }
 
   @Override
   public void computeParameters() {
-    if (s0 == 0)
+    if (s0 == 0) {
       return;
+    }
     mean = s1.divide(s0);
     // compute the average of the component stds
     if (s0 > 1) {
       Vector std = s2.times(s0).minus(s1.times(s1)).assign(
           new SquareRootFunction()).divide(s0);
       sd = std.zSum() / s1.size();
-    } else
+    } else {
       sd = Double.MIN_VALUE;
+    }
   }
 
   @Override
@@ -105,9 +108,11 @@
   public String toString() {
     StringBuilder buf = new StringBuilder();
     buf.append("nm{n=").append(s0).append(" m=[");
-    if (mean != null)
-      for (int i = 0; i < mean.size(); i++)
+    if (mean != null) {
+      for (int i = 0; i < mean.size(); i++) {
         buf.append(String.format("%.2f", mean.get(i))).append(", ");
+      }
+    }
     buf.append("] sd=").append(String.format("%.2f", sd)).append('}');
     return buf.toString();
   }
@@ -127,6 +132,6 @@
     out.writeDouble(sd);
     out.writeInt(s0);
     AbstractVector.writeVector(out, s1);
-    AbstractVector.writeVector(out, s2);    
+    AbstractVector.writeVector(out, s2);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Fri Jul 10 09:35:19 2009
@@ -21,16 +21,17 @@
 import org.apache.mahout.matrix.Vector;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the
- * DirichletCluster algorithm. Uses a Normal Distribution
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
+ * Normal Distribution
  */
 public class NormalModelDistribution implements ModelDistribution<Vector> {
 
   @Override
   public Model<Vector>[] sampleFromPrior(int howMany) {
     Model<Vector>[] result = new NormalModel[howMany];
-    for (int i = 0; i < howMany; i++)
+    for (int i = 0; i < howMany; i++) {
       result[i] = new NormalModel(new DenseVector(2), 1);
+    }
     return result;
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java Fri Jul 10 09:35:19 2009
@@ -22,9 +22,8 @@
 import org.apache.mahout.matrix.Vector;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the
- * DirichletCluster algorithm. Uses a Normal Distribution to sample the prior
- * model values.
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
+ * Normal Distribution to sample the prior model values.
  */
 public class SampledNormalDistribution extends NormalModelDistribution {
 
@@ -32,8 +31,8 @@
   public Model<Vector>[] sampleFromPrior(int howMany) {
     Model<Vector>[] result = new SampledNormalModel[howMany];
     for (int i = 0; i < howMany; i++) {
-      double[] m = { UncommonDistributions.rNorm(0, 1),
-          UncommonDistributions.rNorm(0, 1) };
+      double[] m = {UncommonDistributions.rNorm(0, 1),
+          UncommonDistributions.rNorm(0, 1)};
       DenseVector mean = new DenseVector(m);
       result[i] = new SampledNormalModel(mean, 1);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java Fri Jul 10 09:35:19 2009
@@ -32,15 +32,18 @@
   public String toString() {
     StringBuilder buf = new StringBuilder();
     buf.append("snm{n=").append(s0).append(" m=[");
-    if (mean != null)
-      for (int i = 0; i < mean.size(); i++)
+    if (mean != null) {
+      for (int i = 0; i < mean.size(); i++) {
         buf.append(String.format("%.2f", mean.get(i))).append(", ");
+      }
+    }
     buf.append("] sd=").append(String.format("%.2f", sd)).append('}');
     return buf.toString();
   }
 
   /**
    * Return an instance with the same parameters
+   *
    * @return an SampledNormalModel
    */
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Fri Jul 10 09:35:19 2009
@@ -17,26 +17,26 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 public class FuzzyKMeansClusterMapper extends MapReduceBase implements
-        Mapper<WritableComparable<?>, Vector, Text, FuzzyKMeansOutput> {
+    Mapper<WritableComparable<?>, Vector, Text, FuzzyKMeansOutput> {
   protected List<SoftCluster> clusters;
+
   @Override
   public void map(WritableComparable<?> key, Vector point,
-      OutputCollector<Text, FuzzyKMeansOutput> output, Reporter reporter) throws IOException
-  {
+                  OutputCollector<Text, FuzzyKMeansOutput> output, Reporter reporter) throws IOException {
     SoftCluster.outputPointWithClusterProbabilities(key.toString(), point, clusters, output);
   }
 
@@ -59,8 +59,9 @@
     FuzzyKMeansUtil.configureWithClusterInfo(job
         .get(SoftCluster.CLUSTER_PATH_KEY), clusters);
 
-    if (clusters.isEmpty())
+    if (clusters.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java Fri Jul 10 09:35:19 2009
@@ -17,9 +17,6 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
-import java.util.Iterator;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -27,12 +24,15 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
+import java.io.IOException;
+import java.util.Iterator;
+
 public class FuzzyKMeansCombiner extends MapReduceBase implements
     Reducer<Text, FuzzyKMeansInfo, Text, FuzzyKMeansInfo> {
 
   @Override
   public void reduce(Text key, Iterator<FuzzyKMeansInfo> values,
-      OutputCollector<Text, FuzzyKMeansInfo> output, Reporter reporter) throws IOException {
+                     OutputCollector<Text, FuzzyKMeansInfo> output, Reporter reporter) throws IOException {
     SoftCluster cluster = new SoftCluster(key.toString().trim());
     while (values.hasNext()) {
       //String pointInfo = values.next().toString();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Fri Jul 10 09:35:19 2009
@@ -25,6 +25,7 @@
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -38,7 +39,6 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
@@ -56,7 +56,7 @@
 public class FuzzyKMeansDriver {
 
   private static final Logger log = LoggerFactory
-          .getLogger(FuzzyKMeansDriver.class);
+      .getLogger(FuzzyKMeansDriver.class);
 
 
   private FuzzyKMeansDriver() {
@@ -68,65 +68,65 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
     Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
-            abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
 
     Option clustersOpt = obuilder.withLongName("clusters").withRequired(true).withArgument(
-            abuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).
-            withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  " +
-                    "If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
+        abuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).
+        withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  " +
+            "If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
 
     Option kOpt = obuilder.withLongName("k").withRequired(false).withArgument(
-            abuilder.withName("k").withMinimum(1).withMaximum(1).create()).
-            withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName("k").create();
+        abuilder.withName("k").withMinimum(1).withMaximum(1).create()).
+        withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName("k").create();
 
     Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
-            abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Path to put the output in").withShortName("o").create();
+        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Path to put the output in").withShortName("o").create();
 
 
     Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-            abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("dm").create();
+        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("dm").create();
 
     Option convergenceDeltaOpt = obuilder.withLongName("convergence").withRequired(false).withArgument(
-            abuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).
-            withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
+        abuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).
+        withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
 
     Option maxIterationsOpt = obuilder.withLongName("max").withRequired(false).withArgument(
-            abuilder.withName("max").withMinimum(1).withMaximum(1).create()).
-            withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
+        abuilder.withName("max").withMinimum(1).withMaximum(1).create()).
+        withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
 
     Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-            abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
+        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
 
     Option helpOpt = obuilder.withLongName("help").
-            withDescription("Print out help").withShortName("h").create();
+        withDescription("Print out help").withShortName("h").create();
 
     Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).
-            withDescription("If set, overwrite the output directory").withShortName("w").create();
+        withDescription("If set, overwrite the output directory").withShortName("w").create();
 
     Option clusteringOpt = obuilder.withLongName("clustering").withRequired(false).
-            withDescription("If true, run clustering only (assumes the iterations have already taken place").withShortName("l").create();
+        withDescription("If true, run clustering only (assumes the iterations have already taken place").withShortName("l").create();
 
     Option mOpt = obuilder.withLongName("m").withRequired(true).withArgument(
-            abuilder.withName("m").withMinimum(1).withMaximum(1).create()).
-            withDescription("coefficient normalization factor, must be greater than 1").withShortName("m").create();
+        abuilder.withName("m").withMinimum(1).withMaximum(1).create()).
+        withDescription("coefficient normalization factor, must be greater than 1").withShortName("m").create();
 
     Option numReduceTasksOpt = obuilder.withLongName("numReduce").withRequired(false).withArgument(
-            abuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).
-            withDescription("The number of reduce tasks").withShortName("r").create();
+        abuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).
+        withDescription("The number of reduce tasks").withShortName("r").create();
 
 
     Option numMapTasksOpt = obuilder.withLongName("numMap").withRequired(false).withArgument(
-            abuilder.withName("numMap").withMinimum(1).withMaximum(1).create()).
-            withDescription("The number of map tasks").withShortName("u").create();
+        abuilder.withName("numMap").withMinimum(1).withMaximum(1).create()).
+        withDescription("The number of map tasks").withShortName("u").create();
 
 
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(clustersOpt).withOption(outputOpt).withOption(measureClassOpt)
-            .withOption(convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt)
-            .withOption(vectorClassOpt).withOption(overwriteOutput).withOption(helpOpt).create();
+        .withOption(convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(kOpt).withOption(mOpt)
+        .withOption(vectorClassOpt).withOption(overwriteOutput).withOption(helpOpt).create();
 
     try {
       Parser parser = new Parser();
@@ -150,8 +150,8 @@
       float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());
 
       Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
-              SparseVector.class
-              : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
+          SparseVector.class
+          : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
 
 
       int numReduceTasks = 10;
@@ -175,14 +175,14 @@
 
       if (cmdLine.hasOption(kOpt)) {
         clusters = RandomSeedGenerator.buildRandom(input, clusters,
-                Integer.parseInt(cmdLine.getValue(kOpt).toString())).toString();
+            Integer.parseInt(cmdLine.getValue(kOpt).toString())).toString();
       }
 
       if (cmdLine.hasOption(clusteringOpt)) {
         runClustering(input, clusters, output, measureClass, convergenceDelta, numMapTasks, m, vectorClass);
       } else {
         runJob(input, clusters, output, measureClass, convergenceDelta,
-                maxIterations, numMapTasks, numReduceTasks, m, vectorClass);
+            maxIterations, numMapTasks, numReduceTasks, m, vectorClass);
       }
 
 
@@ -206,7 +206,7 @@
    * @param numMapTasks      the number of mapper tasks
    * @param numReduceTasks   the number of reduce tasks
    * @param m                the fuzzification factor, see http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
-   * @param vectorClass     the {@link org.apache.mahout.matrix.Vector} implementation to use
+   * @param vectorClass      the {@link org.apache.mahout.matrix.Vector} implementation to use
    */
   public static void runJob(String input, String clustersIn, String output,
                             String measureClass, double convergenceDelta, int maxIterations,
@@ -222,7 +222,7 @@
       // point the output to a new directory per iteration
       String clustersOut = output + File.separator + "clusters-" + iteration;
       converged = runIteration(input, clustersIn, clustersOut, measureClass,
-              convergenceDelta, numMapTasks, numReduceTasks, iteration, m);
+          convergenceDelta, numMapTasks, numReduceTasks, iteration, m);
 
       // now point the input to the old output directory
       clustersIn = output + File.separator + "clusters-" + iteration;
@@ -233,7 +233,7 @@
     log.info("Clustering ");
 
     runClustering(input, clustersIn, output + File.separator + "points",
-            measureClass, convergenceDelta, numMapTasks, m, vectorClass);
+        measureClass, convergenceDelta, numMapTasks, m, vectorClass);
   }
 
   /**
@@ -361,7 +361,7 @@
     };
 
     FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(
-            clusterPath, clusterFileFilter)), clusterFileFilter);
+        clusterPath, clusterFileFilter)), clusterFileFilter);
 
     for (FileStatus match : matches) {
       result.add(fs.makeQualified(match.getPath()));

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java Fri Jul 10 09:35:19 2009
@@ -17,14 +17,14 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
 public class FuzzyKMeansInfo implements Writable {
 
   private double probability;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Fri Jul 10 09:35:19 2009
@@ -17,14 +17,14 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
-
 import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.utils.ManhattanDistanceMeasure;
 import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.utils.ManhattanDistanceMeasure;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 public class FuzzyKMeansJob {
 
   private static final Logger log = LoggerFactory
@@ -57,9 +57,7 @@
         maxIterations, numMapTasks, numReduceTasks, doCanopy, m, vectorClass);
   }
 
-  /**
-   * Prints Error Message
-   */
+  /** Prints Error Message */
   private static void printMessage() {
     log
         .warn("Usage: inputDir clusterDir OutputDir measureClass ConvergenceDelata  maxIterations numMapTasks numReduceTasks doCanopy m");
@@ -67,20 +65,20 @@
 
   /**
    * Run the job using supplied arguments
-   * 
-   * @param input the directory pathname for input points
-   * @param clustersIn the directory pathname for initial clusters
-   * @param output the directory pathname for output points
-   * @param measureClass the classname of the DistanceMeasure
+   *
+   * @param input            the directory pathname for input points
+   * @param clustersIn       the directory pathname for initial clusters
+   * @param output           the directory pathname for output points
+   * @param measureClass     the classname of the DistanceMeasure
    * @param convergenceDelta the convergence delta value
-   * @param maxIterations the maximum number of iterations
-   * @param numMapTasks the number of maptasks
-   * @param doCanopy does canopy needed for initial clusters
-   * @param m param needed to fuzzify the cluster membership values
+   * @param maxIterations    the maximum number of iterations
+   * @param numMapTasks      the number of maptasks
+   * @param doCanopy         does canopy needed for initial clusters
+   * @param m                param needed to fuzzify the cluster membership values
    */
   public static void runJob(String input, String clustersIn, String output,
-      String measureClass, double convergenceDelta, int maxIterations,
-      int numMapTasks, int numReduceTasks, boolean doCanopy, float m, Class<? extends Vector> vectorClass)
+                            String measureClass, double convergenceDelta, int maxIterations,
+                            int numMapTasks, int numReduceTasks, boolean doCanopy, float m, Class<? extends Vector> vectorClass)
       throws IOException {
 
     // run canopy to find initial clusters

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Fri Jul 10 09:35:19 2009
@@ -17,10 +17,6 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -32,6 +28,10 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 public class FuzzyKMeansMapper extends MapReduceBase implements
     Mapper<WritableComparable<?>, Vector, Text, FuzzyKMeansInfo> {
 
@@ -41,13 +41,13 @@
 
   @Override
   public void map(WritableComparable<?> key, Vector point,
-      OutputCollector<Text, FuzzyKMeansInfo> output, Reporter reporter) throws IOException {
+                  OutputCollector<Text, FuzzyKMeansInfo> output, Reporter reporter) throws IOException {
     SoftCluster.emitPointProbToCluster(point, clusters, output);
   }
 
   /**
    * Configure the mapper by providing its clusters. Used by unit tests.
-   * 
+   *
    * @param clusters a List<Cluster>
    */
   void config(List<SoftCluster> clusters) {
@@ -66,8 +66,9 @@
     FuzzyKMeansUtil.configureWithClusterInfo(job
         .get(SoftCluster.CLUSTER_PATH_KEY), clusters);
 
-    if (clusters.isEmpty())
+    if (clusters.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java Fri Jul 10 09:35:19 2009
@@ -17,13 +17,6 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -31,6 +24,13 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
 public class FuzzyKMeansReducer extends MapReduceBase implements
     Reducer<Text, FuzzyKMeansInfo, Text, SoftCluster> {
 
@@ -38,7 +38,7 @@
 
   @Override
   public void reduce(Text key, Iterator<FuzzyKMeansInfo> values,
-      OutputCollector<Text, SoftCluster> output, Reporter reporter) throws IOException {
+                     OutputCollector<Text, SoftCluster> output, Reporter reporter) throws IOException {
 
     SoftCluster cluster = clusterMap.get(key.toString());
 
@@ -55,7 +55,7 @@
     }
     // force convergence calculation
     cluster.computeConvergence();
-    output.collect(new Text(cluster.getIdentifier()), cluster); 
+    output.collect(new Text(cluster.getIdentifier()), cluster);
   }
 
   @Override
@@ -70,8 +70,9 @@
         .get(SoftCluster.CLUSTER_PATH_KEY), clusters);
     setClusterMap(clusters);
 
-    if (clusterMap.isEmpty())
+    if (clusterMap.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 
   private void setClusterMap(List<SoftCluster> clusters) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Fri Jul 10 09:35:19 2009
@@ -17,6 +17,7 @@
  */
 
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -24,7 +25,6 @@
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.clustering.kmeans.Cluster;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -39,12 +39,7 @@
   private FuzzyKMeansUtil() {
   }
 
-  /**
-   * Configure the mapper with the cluster info
-   *
-   * @param clusterPathStr
-   * @param clusters
-   */
+  /** Configure the mapper with the cluster info */
   public static void configureWithClusterInfo(String clusterPathStr, List<SoftCluster> clusters) {
     //Get the path location where the cluster Info is stored
     Configuration job = new Configuration();
@@ -63,7 +58,7 @@
       //get all filtered file names in result list
       FileSystem fs = clusterPath.getFileSystem(job);
       FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(
-              clusterPath, clusterFileFilter)), clusterFileFilter);
+          clusterPath, clusterFileFilter)), clusterFileFilter);
 
       for (FileStatus match : matches) {
         result.add(fs.makeQualified(match.getPath()));
@@ -86,7 +81,7 @@
             log.error("Exception", e);
             throw new RuntimeException(e);
           }
-          if (valueClass.equals(Cluster.class)){
+          if (valueClass.equals(Cluster.class)) {
             Cluster value = new Cluster();
             while (reader.next(key, value)) {
               // get the cluster info
@@ -94,7 +89,7 @@
               clusters.add(theCluster);
               value = new Cluster();
             }
-          } else if (valueClass.equals(SoftCluster.class)){
+          } else if (valueClass.equals(SoftCluster.class)) {
             SoftCluster value = new SoftCluster();
             while (reader.next(key, value)) {
               // get the cluster info

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Fri Jul 10 09:35:19 2009
@@ -17,12 +17,6 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
@@ -33,6 +27,12 @@
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.utils.DistanceMeasure;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 public class SoftCluster implements Writable {
 
   public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
@@ -88,7 +88,7 @@
 
   /**
    * Format the SoftCluster for output
-   * 
+   *
    * @param cluster the Cluster
    */
   public static String formatCluster(SoftCluster cluster) {
@@ -98,7 +98,7 @@
 
   /**
    * Decodes and returns a SoftCluster from the formattedString
-   * 
+   *
    * @param formattedString a String produced by formatCluster
    */
   public static SoftCluster decodeCluster(String formattedString) {
@@ -138,7 +138,7 @@
 
   /**
    * Configure the distance measure from the job
-   * 
+   *
    * @param job the JobConf for the job
    */
   public static void configure(JobConf job) {
@@ -161,8 +161,8 @@
 
   /**
    * Configure the distance measure directly. Used by unit tests.
-   * 
-   * @param aMeasure the DistanceMeasure
+   *
+   * @param aMeasure          the DistanceMeasure
    * @param aConvergenceDelta the delta value used to define convergence
    */
   public static void config(DistanceMeasure aMeasure, double aConvergenceDelta) {
@@ -173,15 +173,14 @@
 
   /**
    * Emit the point and its probability of belongingness to each cluster
-   * 
-   * @param point a point
+   *
+   * @param point    a point
    * @param clusters a List<SoftCluster>
-   * @param output the OutputCollector to emit into
-   * @throws IOException
+   * @param output   the OutputCollector to emit into
    */
   public static void emitPointProbToCluster(Vector point,
-      List<SoftCluster> clusters,
-      OutputCollector<Text, FuzzyKMeansInfo> output) throws IOException {
+                                            List<SoftCluster> clusters,
+                                            OutputCollector<Text, FuzzyKMeansInfo> output) throws IOException {
     List<Double> clusterDistanceList = new ArrayList<Double>();
     for (SoftCluster cluster : clusters) {
       clusterDistanceList.add(measure.distance(cluster.getCenter(), point));
@@ -203,15 +202,14 @@
 
   /**
    * Output point with cluster info (Cluster and probability)
-   * 
-   * @param point a point
+   *
+   * @param point    a point
    * @param clusters a List<SoftCluster> to test
-   * @param output the OutputCollector to emit into
-   * @throws IOException
+   * @param output   the OutputCollector to emit into
    */
   public static void outputPointWithClusterProbabilities(String key,
-      Vector point, List<SoftCluster> clusters,
-      OutputCollector<Text, FuzzyKMeansOutput> output) throws IOException {
+                                                         Vector point, List<SoftCluster> clusters,
+                                                         OutputCollector<Text, FuzzyKMeansOutput> output) throws IOException {
     List<Double> clusterDistanceList = new ArrayList<Double>();
 
     for (SoftCluster cluster : clusters) {
@@ -229,25 +227,21 @@
     }
     String name = point.getName();
     output.collect(new Text(name != null && name.length() != 0 ? name
-            : point.asFormatString()),
-            fOutput);
+        : point.asFormatString()),
+        fOutput);
   }
 
-  /**
-   * Computes the probability of a point belonging to a cluster
-   * 
-   * @param clusterDistance
-   * @param clusterDistanceList
-   */
+  /** Computes the probability of a point belonging to a cluster */
   public static double computeProbWeight(double clusterDistance,
-      List<Double> clusterDistanceList) {
+                                         List<Double> clusterDistanceList) {
     if (clusterDistance == 0) {
       clusterDistance = MINIMAL_VALUE;
     }
     double denom = 0.0;
     for (double eachCDist : clusterDistanceList) {
-      if (eachCDist == 0.0)
+      if (eachCDist == 0.0) {
         eachCDist = MINIMAL_VALUE;
+      }
 
       denom += Math.pow(clusterDistance / eachCDist, 2.0 / (m - 1));
 
@@ -257,13 +251,13 @@
 
   /**
    * Compute the centroid
-   * 
+   *
    * @return the new centroid
    */
   private Vector computeCentroid() {
-    if (pointProbSum == 0)
+    if (pointProbSum == 0) {
       return weightedPointTotal;
-    else if (centroid == null) {
+    } else if (centroid == null) {
       // lazy compute new centroid
       centroid = weightedPointTotal.divide(pointProbSum);
     }
@@ -276,7 +270,7 @@
 
   /**
    * Construct a new SoftCluster with the given point as its center
-   * 
+   *
    * @param center the center point
    */
   public SoftCluster(Vector center) {
@@ -289,7 +283,7 @@
 
   /**
    * Construct a new SoftCluster with the given point as its center
-   * 
+   *
    * @param center the center point
    */
   public SoftCluster(Vector center, int clusterId) {
@@ -299,11 +293,7 @@
     this.weightedPointTotal = center.like();
   }
 
-  /**
-   * Construct a new softcluster with the given clusterID
-   * 
-   * @param clusterId
-   */
+  /** Construct a new softcluster with the given clusterID */
   public SoftCluster(String clusterId) {
 
     this.clusterId = Integer.parseInt((clusterId.substring(1)));
@@ -318,72 +308,70 @@
   }
 
   public String getIdentifier() {
-    if (converged)
+    if (converged) {
       return "V" + clusterId;
-    else
+    } else {
       return "C" + clusterId;
+    }
   }
 
-  /**
-   * Observe the point, accumulating weighted variables for std() calculation
-   * @param point
-   * @param ptProb
-   */
+  /** Observe the point, accumulating weighted variables for std() calculation */
   private void observePoint(Vector point, double ptProb) {
     s0 += ptProb;
     Vector wtPt = point.times(ptProb);
-    if (s1 == null)
+    if (s1 == null) {
       s1 = point.clone();
-    else
+    } else {
       s1 = s1.plus(wtPt);
-    if (s2 == null)
+    }
+    if (s2 == null) {
       s2 = wtPt.times(wtPt);
-    else
+    } else {
       s2 = s2.plus(wtPt.times(wtPt));
+    }
   }
 
-  /**
-   * Compute a "standard deviation" value to use as the "radius" of the cluster for display purposes
-   * @return
-   */
+  /** Compute a "standard deviation" value to use as the "radius" of the cluster for display purposes */
   public double std() {
     if (s0 > 0) {
       Vector radical = s2.times(s0).minus(s1.times(s1));
       radical = radical.times(radical).assign(new SquareRootFunction());
       Vector stds = radical.assign(new SquareRootFunction()).divide(s0);
       return stds.zSum() / stds.size();
-    } else
+    } else {
       return 0;
+    }
   }
 
   /**
    * Add the point to the SoftCluster
-   * 
+   *
    * @param point a point to add
-   * @param ptProb
    */
   public void addPoint(Vector point, double ptProb) {
     observePoint(point, ptProb);
     centroid = null;
     pointProbSum += ptProb;
-    if (weightedPointTotal == null)
+    if (weightedPointTotal == null) {
       weightedPointTotal = point.clone().times(ptProb);
-    else
+    } else {
       weightedPointTotal = weightedPointTotal.plus(point.times(ptProb));
+    }
   }
 
   /**
    * Add the point to the cluster
-   * 
+   *
    * @param delta a point to add
    */
   public void addPoints(Vector delta, double partialSumPtProb) {
     centroid = null;
     pointProbSum += partialSumPtProb;
-    if (weightedPointTotal == null)
+    if (weightedPointTotal == null) {
       weightedPointTotal = delta.clone();
-    else
+    } else {
       weightedPointTotal = weightedPointTotal.plus(delta);
+    }
   }
 
   public Vector getCenter() {
@@ -394,9 +382,7 @@
     return pointProbSum;
   }
 
-  /**
-   * Compute the centroid and set the center to it.
-   */
+  /** Compute the centroid and set the center to it. */
   public void recomputeCenter() {
     center = computeCentroid();
     pointProbSum = 0;
@@ -405,7 +391,7 @@
 
   /**
    * Return if the cluster is converged by comparing its center and centroid.
-   * 
+   *
    * @return if the cluster is converged
    */
   public boolean computeConvergence() {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Fri Jul 10 09:35:19 2009
@@ -16,11 +16,6 @@
  */
 package org.apache.mahout.clustering.kmeans;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
@@ -31,6 +26,11 @@
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.utils.DistanceMeasure;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
 public class Cluster extends ClusterBase implements Writable {
 
   private static final String ERROR_UNKNOWN_CLUSTER_FORMAT = "Unknown cluster format:\n";
@@ -41,13 +41,9 @@
 
   public static final String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
 
-  /**
-   * The number of iterations that have taken place
-   */
+  /** The number of iterations that have taken place */
   public static final String ITERATION_NUMBER = "org.apache.mahout.clustering.kmeans.iteration";
-  /**
-   * Boolean value indicating whether the initial input is from Canopy clustering
-   */
+  /** Boolean value indicating whether the initial input is from Canopy clustering */
   public static final String CANOPY_INPUT = "org.apache.mahout.clustering.kmeans.canopyInput";
 
   private static int nextClusterId = 0;
@@ -60,7 +56,6 @@
   private double std;
 
 
-
   // the total of all the points squared, used for std computation
   private Vector pointSquaredTotal = null;
 
@@ -71,7 +66,7 @@
 
   /**
    * Format the cluster for output
-   * 
+   *
    * @param cluster the Cluster
    * @return the String representation of the Cluster
    */
@@ -87,7 +82,7 @@
 
   /**
    * Decodes and returns a Cluster from the formattedString
-   * 
+   *
    * @param formattedString a String produced by formatCluster
    * @return a decoded Cluster, not null
    * @throws IllegalArgumentException when the string is wrongly formatted
@@ -105,9 +100,10 @@
       Cluster cluster = new Cluster(clusterCenter, clusterId);
       cluster.converged = startsWithV;
       return cluster;
-    } else
+    } else {
       throw new IllegalArgumentException(ERROR_UNKNOWN_CLUSTER_FORMAT
           + formattedString);
+    }
   }
 
 
@@ -130,7 +126,7 @@
 
   /**
    * Configure the distance measure from the job
-   * 
+   *
    * @param job the JobConf for the job
    */
   public static void configure(JobConf job) {
@@ -152,8 +148,8 @@
 
   /**
    * Configure the distance measure directly. Used by unit tests.
-   * 
-   * @param aMeasure the DistanceMeasure
+   *
+   * @param aMeasure          the DistanceMeasure
    * @param aConvergenceDelta the delta value used to define convergence
    */
   public static void config(DistanceMeasure aMeasure, double aConvergenceDelta) {
@@ -164,14 +160,13 @@
 
   /**
    * Emit the point to the nearest cluster center
-   * 
-   * @param point a point
+   *
+   * @param point    a point
    * @param clusters a List<Cluster> to test
-   * @param output the OutputCollector to emit into
-   * @throws IOException
+   * @param output   the OutputCollector to emit into
    */
   public static void emitPointToNearestCluster(Vector point,
-      List<Cluster> clusters, OutputCollector<Text, KMeansInfo> output)
+                                               List<Cluster> clusters, OutputCollector<Text, KMeansInfo> output)
       throws IOException {
     Cluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
@@ -187,7 +182,7 @@
   }
 
   public static void outputPointWithClusterInfo(Vector point,
-      List<Cluster> clusters, OutputCollector<Text, Text> output)
+                                                List<Cluster> clusters, OutputCollector<Text, Text> output)
       throws IOException {
     Cluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
@@ -205,13 +200,13 @@
 
   /**
    * Compute the centroid by averaging the pointTotals
-   * 
+   *
    * @return the new centroid
    */
   private Vector computeCentroid() {
-    if (numPoints == 0)
+    if (numPoints == 0) {
       return center;
-    else if (centroid == null) {
+    } else if (centroid == null) {
       // lazy compute new centroid
       centroid = pointTotal.divide(numPoints);
       Vector stds = pointSquaredTotal.times(numPoints).minus(
@@ -224,9 +219,8 @@
 
   /**
    * Construct a new cluster with the given point as its center
-   * 
-   * @param center
-   *            the center point
+   *
+   * @param center the center point
    */
   public Cluster(Vector center) {
     super();
@@ -237,15 +231,13 @@
     this.pointSquaredTotal = center.like();
   }
 
-  /**
-   * For (de)serialization as a Writable
-   */
+  /** For (de)serialization as a Writable */
   public Cluster() {
   }
 
   /**
    * Construct a new cluster with the given point as its center
-   * 
+   *
    * @param center the center point
    */
   public Cluster(Vector center, int clusterId) {
@@ -257,9 +249,7 @@
     this.pointSquaredTotal = center.like();
   }
 
-  /**
-   * Construct a new clsuter with the given id as identifier
-   */
+  /** Construct a new clsuter with the given id as identifier */
   public Cluster(String clusterId) {
 
     this.id = Integer.parseInt((clusterId.substring(1)));
@@ -273,15 +263,16 @@
   }
 
   public String getIdentifier() {
-    if (converged)
+    if (converged) {
       return "V" + id;
-    else
+    } else {
       return "C" + id;
+    }
   }
 
   /**
    * Add the point to the cluster
-   * 
+   *
    * @param point a point to add
    */
   public void addPoint(Vector point) {
@@ -290,7 +281,7 @@
 
   /**
    * Add the point to the cluster
-   * 
+   *
    * @param count the number of points in the delta
    * @param delta a point to add
    */
@@ -306,12 +297,8 @@
     }
   }
 
-  
-
 
-  /**
-   * Compute the centroid and set the center to it.
-   */
+  /** Compute the centroid and set the center to it. */
   public void recomputeCenter() {
     center = computeCentroid();
     numPoints = 0;
@@ -320,7 +307,7 @@
 
   /**
    * Return if the cluster is converged by comparing its center and centroid.
-   * 
+   *
    * @return if the cluster is converged
    */
   public boolean computeConvergence() {
@@ -330,14 +317,11 @@
   }
 
 
-
   public boolean isConverged() {
     return converged;
   }
 
-  /**
-   * @return the std
-   */
+  /** @return the std */
   public double getStd() {
     return std;
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Fri Jul 10 09:35:19 2009
@@ -19,18 +19,18 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-public class KMeansClusterMapper extends MapReduceBase  implements
-        Mapper<WritableComparable<?>, Vector, Text, Text> {
+public class KMeansClusterMapper extends MapReduceBase implements
+    Mapper<WritableComparable<?>, Vector, Text, Text> {
   protected List<Cluster> clusters;
 
 
@@ -56,10 +56,11 @@
     clusters = new ArrayList<Cluster>();
 
     KMeansUtil.configureWithClusterInfo(job.get(Cluster.CLUSTER_PATH_KEY),
-            clusters);
+        clusters);
 
-    if (clusters.isEmpty())
+    if (clusters.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java Fri Jul 10 09:35:19 2009
@@ -16,9 +16,6 @@
  */
 package org.apache.mahout.clustering.kmeans;
 
-import java.io.IOException;
-import java.util.Iterator;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -26,19 +23,22 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
+import java.io.IOException;
+import java.util.Iterator;
+
 public class KMeansCombiner extends MapReduceBase implements
     Reducer<Text, KMeansInfo, Text, KMeansInfo> {
 
   @Override
   public void reduce(Text key, Iterator<KMeansInfo> values,
-      OutputCollector<Text, KMeansInfo> output, Reporter reporter) throws IOException {
+                     OutputCollector<Text, KMeansInfo> output, Reporter reporter) throws IOException {
     Cluster cluster = new Cluster(key.toString());
     while (values.hasNext()) {
       KMeansInfo next = values.next();
       cluster.addPoints(next.getPoints(),
           next.getPointTotal());
     }
-    output.collect(key, new KMeansInfo(cluster.getNumPoints(), cluster.getPointTotal())); 
+    output.collect(key, new KMeansInfo(cluster.getNumPoints(), cluster.getPointTotal()));
   }
 
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Fri Jul 10 09:35:19 2009
@@ -47,9 +47,7 @@
 
 public class KMeansDriver {
 
-  /**
-   * The name of the directory used to output final results.
-   */
+  /** The name of the directory used to output final results. */
   public static final String DEFAULT_OUTPUT_DIRECTORY = "/points";
 
   private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);
@@ -57,9 +55,7 @@
   private KMeansDriver() {
   }
 
-  /**
-   * @param args Expects 7 args and they all correspond to the order of the params in {@link #runJob}
-   */
+  /** @param args Expects 7 args and they all correspond to the order of the params in {@link #runJob} */
   public static void main(String[] args) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
 
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -67,51 +63,51 @@
     GroupBuilder gbuilder = new GroupBuilder();
 
     Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
-            abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
 
     Option clustersOpt = obuilder.withLongName("clusters").withRequired(true).withArgument(
-            abuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).
-            withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  " +
-                    "If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
+        abuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).
+        withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  " +
+            "If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
 
     Option kOpt = obuilder.withLongName("k").withRequired(false).withArgument(
-            abuilder.withName("k").withMinimum(1).withMaximum(1).create()).
-            withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName("k").create();
+        abuilder.withName("k").withMinimum(1).withMaximum(1).create()).
+        withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName("k").create();
 
     Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
-            abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Path to put the output in").withShortName("o").create();
+        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Path to put the output in").withShortName("o").create();
 
     Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).
-            withDescription("If set, overwrite the output directory").withShortName("w").create();
+        withDescription("If set, overwrite the output directory").withShortName("w").create();
 
     Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-            abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
 
     Option convergenceDeltaOpt = obuilder.withLongName("convergence").withRequired(false).withArgument(
-            abuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).
-            withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
+        abuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).
+        withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
 
     Option maxIterationsOpt = obuilder.withLongName("max").withRequired(false).withArgument(
-            abuilder.withName("max").withMinimum(1).withMaximum(1).create()).
-            withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
+        abuilder.withName("max").withMinimum(1).withMaximum(1).create()).
+        withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
 
     Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-            abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
+        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
 
     Option numReduceTasksOpt = obuilder.withLongName("numReduce").withRequired(false).withArgument(
-            abuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).
-            withDescription("The number of reduce tasks").withShortName("r").create();
+        abuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).
+        withDescription("The number of reduce tasks").withShortName("r").create();
 
     Option helpOpt = obuilder.withLongName("help").
-            withDescription("Print out help").withShortName("h").create();
+        withDescription("Print out help").withShortName("h").create();
 
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(clustersOpt).withOption(outputOpt).withOption(measureClassOpt)
-            .withOption(convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
-            .withOption(vectorClassOpt).withOption(overwriteOutput).withOption(helpOpt).create();
+        .withOption(convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(numReduceTasksOpt).withOption(kOpt)
+        .withOption(vectorClassOpt).withOption(overwriteOutput).withOption(helpOpt).create();
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -134,8 +130,8 @@
       }
 
       Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
-              SparseVector.class
-              : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
+          SparseVector.class
+          : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
 
 
       int maxIterations = 20;
@@ -153,7 +149,7 @@
         clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(cmdLine.getValue(kOpt).toString())).toString();
       }
       runJob(input, clusters, output, measureClass, convergenceDelta,
-              maxIterations, numReduceTasks, vectorClass);
+          maxIterations, numReduceTasks, vectorClass);
     } catch (OptionException e) {
       log.error("Exception", e);
       CommandLineUtil.printHelp(group);
@@ -161,7 +157,6 @@
   }
 
 
-
   /**
    * Run the job using supplied arguments
    *
@@ -172,7 +167,6 @@
    * @param convergenceDelta the convergence delta value
    * @param maxIterations    the maximum number of iterations
    * @param numReduceTasks   the number of reducers
-   * @param vectorClass
    */
   public static void runJob(String input, String clustersIn, String output,
                             String measureClass, double convergenceDelta, int maxIterations,
@@ -190,7 +184,7 @@
       // point the output to a new directory per iteration
       String clustersOut = output + "/clusters-" + iteration;
       converged = runIteration(input, clustersIn, clustersOut, measureClass,
-              delta, numReduceTasks, iteration);
+          delta, numReduceTasks, iteration);
       // now point the input to the old output directory
       clustersIn = output + "/clusters-" + iteration;
       iteration++;
@@ -299,7 +293,7 @@
    * @throws IOException if there was an IO error
    */
   private static boolean isConverged(String filePath, JobConf conf, FileSystem fs)
-          throws IOException {
+      throws IOException {
     Path outPart = new Path(filePath + "/*");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, outPart, conf);
     Writable key;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java Fri Jul 10 09:35:19 2009
@@ -21,9 +21,9 @@
 import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.io.DataInput;
 
 public class KMeansInfo implements Writable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Fri Jul 10 09:35:19 2009
@@ -16,10 +16,6 @@
  */
 package org.apache.mahout.clustering.kmeans;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -29,6 +25,10 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 public class KMeansMapper extends MapReduceBase implements
     Mapper<WritableComparable<?>, Vector, Text, KMeansInfo> {
 
@@ -36,13 +36,13 @@
 
   @Override
   public void map(WritableComparable<?> key, Vector point,
-      OutputCollector<Text, KMeansInfo> output, Reporter reporter) throws IOException {
-    Cluster.emitPointToNearestCluster(point, clusters,  output);
+                  OutputCollector<Text, KMeansInfo> output, Reporter reporter) throws IOException {
+    Cluster.emitPointToNearestCluster(point, clusters, output);
   }
 
   /**
    * Configure the mapper by providing its clusters. Used by unit tests.
-   * 
+   *
    * @param clusters a List<Cluster>
    */
   void config(List<Cluster> clusters) {
@@ -58,7 +58,8 @@
     KMeansUtil.configureWithClusterInfo(job.get(Cluster.CLUSTER_PATH_KEY),
         clusters);
 
-    if (clusters.isEmpty())
+    if (clusters.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java Fri Jul 10 09:35:19 2009
@@ -16,13 +16,6 @@
  */
 package org.apache.mahout.clustering.kmeans;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -30,6 +23,13 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
 public class KMeansReducer extends MapReduceBase implements
     Reducer<Text, KMeansInfo, Text, Cluster> {
 
@@ -37,7 +37,7 @@
 
   @Override
   public void reduce(Text key, Iterator<KMeansInfo> values,
-      OutputCollector<Text, Cluster> output, Reporter reporter) throws IOException {
+                     OutputCollector<Text, Cluster> output, Reporter reporter) throws IOException {
     Cluster cluster = clusterMap.get(key.toString());
 
     while (values.hasNext()) {
@@ -61,8 +61,9 @@
         clusters);
     setClusterMap(clusters);
 
-    if (clusterMap.isEmpty())
+    if (clusterMap.isEmpty()) {
       throw new NullPointerException("Cluster is empty!!!");
+    }
   }
 
   private void setClusterMap(List<Cluster> clusters) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Fri Jul 10 09:35:19 2009
@@ -16,10 +16,6 @@
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -32,6 +28,10 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 final class KMeansUtil {
 
   private static final Logger log = LoggerFactory.getLogger(KMeansUtil.class);
@@ -39,12 +39,10 @@
   private KMeansUtil() {
   }
 
-  /**
-   * Configure the mapper with the cluster info
-   */
+  /** Configure the mapper with the cluster info */
   public static void configureWithClusterInfo(String clusterPathStr,
-      List<Cluster> clusters) {
-    
+                                              List<Cluster> clusters) {
+
     // Get the path location where the cluster Info is stored
     JobConf job = new JobConf(KMeansUtil.class);
     Path clusterPath = new Path(clusterPathStr + "/*");
@@ -72,7 +70,7 @@
       for (Path path : result) {
         SequenceFile.Reader reader = null;
         try {
-          reader =new SequenceFile.Reader(fs, path, job);
+          reader = new SequenceFile.Reader(fs, path, job);
           Class<?> valueClass = reader.getValueClass();
           Writable key;
           try {
@@ -84,14 +82,14 @@
             log.error("Exception", e);
             throw new RuntimeException(e);
           }
-          if (valueClass.equals(Cluster.class)){
+          if (valueClass.equals(Cluster.class)) {
             Cluster value = new Cluster();
             while (reader.next(key, value)) {
               // get the cluster info
               clusters.add(value);
               value = new Cluster();
             }
-          } else if (valueClass.equals(Canopy.class)){
+          } else if (valueClass.equals(Canopy.class)) {
             Canopy value = new Canopy();
             while (reader.next(key, value)) {
               // get the cluster info

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java Fri Jul 10 09:35:19 2009
@@ -28,17 +28,15 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.Random;
-import java.util.List;
 import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
 
 
 /**
- * Given an Input Path containing a {@link org.apache.hadoop.io.SequenceFile}, randomly select k vectors
- * and write them to the output file as a {@link org.apache.mahout.clustering.kmeans.Cluster} representing
- * the initial centroid to use.
+ * Given an Input Path containing a {@link org.apache.hadoop.io.SequenceFile}, randomly select k vectors and write them
+ * to the output file as a {@link org.apache.mahout.clustering.kmeans.Cluster} representing the initial centroid to use.
  * <p/>
- *
  */
 public final class RandomSeedGenerator {
 
@@ -46,7 +44,8 @@
 
   public static final String K = "k";
 
-  private RandomSeedGenerator() {}
+  private RandomSeedGenerator() {
+  }
 
   public static Path buildRandom(String input, String output,
                                  int k) throws IOException, IllegalAccessException, InstantiationException {
@@ -59,12 +58,12 @@
     }
     fs.mkdirs(outPath);
     Path outFile = new Path(outPath, "part-randomSeed");
-    if (fs.exists(outFile) == true){
+    if (fs.exists(outFile) == true) {
       log.warn("Deleting " + outFile);
       fs.delete(outFile, false);
     }
     boolean newFile = fs.createNewFile(outFile);
-    if (newFile == true){
+    if (newFile == true) {
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(input), conf);
       Writable key = (Writable) reader.getKeyClass().newInstance();
       Vector value = (Vector) reader.getValueClass().newInstance();
@@ -73,7 +72,7 @@
 
       List<Text> chosenTexts = new ArrayList<Text>(k);
       List<Cluster> chosenClusters = new ArrayList<Cluster>(k);
-      while (reader.next(key, value)){
+      while (reader.next(key, value)) {
         Cluster newCluster = new Cluster(value);
         newCluster.addPoint(value);
         Text newText = new Text(key.toString());