You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/05/10 13:30:15 UTC

svn commit: r1101411 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/lda/ core/src/test/java/org/apache/mahout/clustering/ core/src/test/java/org/apache/mahout/math/hadoop/decomposer/...

Author: srowen
Date: Tue May 10 11:30:14 2011
New Revision: 1101411

URL: http://svn.apache.org/viewvc?rev=1101411&view=rev
Log:
Style re-changes for MAHOUT-683, MAHOUT-682

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java Tue May 10 11:30:14 2011
@@ -18,7 +18,6 @@ package org.apache.mahout.clustering;
 
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -38,7 +37,6 @@ import org.apache.mahout.math.VectorWrit
  * with a set of models. To date, it has been tested with k-means and Dirichlet
  * clustering. See examples DisplayKMeans and DisplayDirichlet which have been
  * switched over to use it.
- * 
  */
 public class ClusterIterator {
   
@@ -53,15 +51,14 @@ public class ClusterIterator {
    * iterations
    * 
    * @param data
-   *          a List<Vector> of input vectors
+   *          a {@code List<Vector>} of input vectors
    * @param classifier
    *          a prior ClusterClassifier
    * @param numIterations
    *          the int number of iterations to perform
    * @return the posterior ClusterClassifier
    */
-  public ClusterClassifier iterate(List<Vector> data,
-      ClusterClassifier classifier, int numIterations) {
+  public ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
     for (int iteration = 1; iteration <= numIterations; iteration++) {
       for (Vector vector : data) {
         // classification yields probabilities
@@ -69,8 +66,7 @@ public class ClusterIterator {
         // policy selects weights for models given those probabilities
         Vector weights = policy.select(probabilities);
         // training causes all models to observe data
-        for (Iterator<Vector.Element> it = weights.iterateNonZero(); it
-            .hasNext();) {
+        for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
           int index = it.next().index();
           classifier.train(index, vector, weights.get(index));
         }
@@ -97,8 +93,7 @@ public class ClusterIterator {
    *          the int number of iterations to perform
    * @throws IOException
    */
-  public void iterate(Path inPath, Path priorPath, Path outPath,
-      int numIterations) throws IOException {
+  public void iterate(Path inPath, Path priorPath, Path outPath, int numIterations) throws IOException {
     ClusterClassifier classifier = readClassifier(priorPath);
     Configuration conf = new Configuration();
     for (int iteration = 1; iteration <= numIterations; iteration++) {
@@ -126,8 +121,7 @@ public class ClusterIterator {
     }
   }
   
-  private void writeClassifier(ClusterClassifier classifier, Path outPath, String k)
-      throws IOException {
+  private static void writeClassifier(ClusterClassifier classifier, Path outPath, String k) throws IOException {
     Configuration config = new Configuration();
     FileSystem fs = FileSystem.get(outPath.toUri(), config);
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, outPath,
@@ -137,7 +131,7 @@ public class ClusterIterator {
     writer.close();
   }
   
-  private ClusterClassifier readClassifier(Path inPath) throws IOException {
+  private static ClusterClassifier readClassifier(Path inPath) throws IOException {
     Configuration config = new Configuration();
     FileSystem fs = FileSystem.get(inPath.toUri(), config);
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPath, config);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java Tue May 10 11:30:14 2011
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.clustering.lda;
 
 import org.apache.hadoop.conf.Configuration;
@@ -8,9 +25,9 @@ import org.apache.mahout.math.VectorWrit
 
 import java.io.IOException;
 
-public class LDADocumentTopicMapper extends Mapper<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
+public class LDADocumentTopicMapper
+    extends Mapper<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
 
-  private LDAState state;
   private LDAInference infer;
 
   @Override
@@ -19,9 +36,8 @@ public class LDADocumentTopicMapper exte
                      Context context) throws IOException, InterruptedException {
 
     Vector wordCounts = wordCountsWritable.get();
-    LDAInference.InferredDocument doc;
     try {
-      doc = infer.infer(wordCounts);
+      LDAInference.InferredDocument doc = infer.infer(wordCounts);
       context.write(key, new VectorWritable(doc.getGamma().normalize(1)));
     } catch (ArrayIndexOutOfBoundsException e1) {
       throw new IllegalStateException(
@@ -32,17 +48,12 @@ public class LDADocumentTopicMapper exte
   }
 
   public void configure(LDAState myState) {
-    this.state = myState;
-    this.infer = new LDAInference(state);
+    this.infer = new LDAInference(myState);
   }
 
   public void configure(Configuration job) {
-    try {
-      LDAState myState = LDADriver.createState(job);
-      configure(myState);
-    } catch (IOException e) {
-      throw new IllegalStateException("Error creating LDA State!", e);
-    }
+    LDAState myState = LDADriver.createState(job);
+    configure(myState);
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java Tue May 10 11:30:14 2011
@@ -50,9 +50,9 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
-import java.util.List;
 import java.util.Random;
 
 /**
@@ -91,11 +91,11 @@ public final class LDADriver extends Abs
     ToolRunner.run(new Configuration(), new LDADriver(), args);
   }
 
-  public static LDAState createState(Configuration job) throws IOException {
+  public static LDAState createState(Configuration job) {
     return createState(job, false);
   }
 
-  public static LDAState createState(Configuration job, boolean empty) throws IOException {
+  public static LDAState createState(Configuration job, boolean empty) {
     String statePath = job.get(STATE_IN_KEY);
     int numTopics = Integer.parseInt(job.get(NUM_TOPICS_KEY));
     int numWords = Integer.parseInt(job.get(NUM_WORDS_KEY));
@@ -224,7 +224,9 @@ public final class LDADriver extends Abs
       conf.set(STATE_IN_KEY, stateIn.toString());
       // point the output to a new directory per iteration
       Path stateOut = new Path(output, "state-" + iteration);
-      double ll = runSequential ? runIterationSequential(conf, input, stateOut) : runIteration(conf, input, stateIn, stateOut);
+      double ll = runSequential
+          ? runIterationSequential(conf, input, stateOut)
+          : runIteration(conf, input, stateIn, stateOut);
       double relChange = (oldLL - ll) / oldLL;
 
       // now point the input to the old output directory
@@ -239,11 +241,17 @@ public final class LDADriver extends Abs
     if(runSequential) {
       computeDocumentTopicProbabilitiesSequential(conf, input, new Path(output, "docTopics"));
     } else {
-      computeDocumentTopicProbabilities(conf, input, stateIn, new Path(output, "docTopics"), numTopics, numWords, topicSmoothing);
+      computeDocumentTopicProbabilities(conf,
+                                        input,
+                                        stateIn,
+                                        new Path(output, "docTopics"),
+                                        numTopics,
+                                        numWords,
+                                        topicSmoothing);
     }
   }
 
-  private void writeInitialState(Path statePath, int numTopics, int numWords) throws IOException {
+  private static void writeInitialState(Path statePath, int numTopics, int numWords) throws IOException {
     Configuration job = new Configuration();
     FileSystem fs = statePath.getFileSystem(job);
 
@@ -272,7 +280,7 @@ public final class LDADriver extends Abs
     }
   }
 
-  private void writeState(Configuration job, LDAState state, Path statePath) throws IOException {
+  private static void writeState(Configuration job, LDAState state, Path statePath) throws IOException {
     FileSystem fs = statePath.getFileSystem(job);
     DoubleWritable v = new DoubleWritable();
 
@@ -298,7 +306,7 @@ public final class LDADriver extends Abs
     writer.close();
   }
 
-  private double findLL(Path statePath, Configuration job) throws IOException {
+  private static double findLL(Path statePath, Configuration job) throws IOException {
     FileSystem fs = statePath.getFileSystem(job);
     double ll = 0.0;
     for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
@@ -317,20 +325,19 @@ public final class LDADriver extends Abs
     return ll;
   }
 
-  private double runIterationSequential(Configuration conf, Path input, Path stateOut)
-    throws IOException, InterruptedException {
+  private double runIterationSequential(Configuration conf, Path input, Path stateOut) throws IOException {
     if(state == null) {
       state = createState(conf);
     }
     if(trainingCorpus == null) {
       Class<? extends Writable> keyClass = peekAtSequenceFileForKeyType(conf, input);
-      List<Pair<Writable,VectorWritable>> corpus = new LinkedList<Pair<Writable, VectorWritable>>();
-      for(FileStatus fileStatus : FileSystem.get(conf).globStatus(new Path(input, "part-*"))) {
+      Collection<Pair<Writable, VectorWritable>> corpus = new LinkedList<Pair<Writable, VectorWritable>>();
+      for (FileStatus fileStatus : FileSystem.get(conf).globStatus(new Path(input, "part-*"))) {
         Path inputPart = fileStatus.getPath();
         SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), inputPart, conf);
         Writable key = ReflectionUtils.newInstance(keyClass, conf);
         VectorWritable value = new VectorWritable();
-        while(reader.next(key, value)) {
+        while (reader.next(key, value)) {
           Writable nextKey = ReflectionUtils.newInstance(keyClass, conf);
           VectorWritable nextValue = new VectorWritable();
           corpus.add(new Pair<Writable,VectorWritable>(key, value));
@@ -340,11 +347,11 @@ public final class LDADriver extends Abs
       }
       trainingCorpus = corpus;
     }
-    if(inference == null) {
+    if (inference == null) {
       inference = new LDAInference(state);
     }
-    double ll = 0;
     newState = createState(conf, true);
+    double ll = 0.0;
     for(Pair<Writable, VectorWritable> slice : trainingCorpus) {
       LDAInference.InferredDocument doc;
       Vector wordCounts = slice.getSecond().get();
@@ -386,7 +393,7 @@ public final class LDADriver extends Abs
    * @param stateOut
    *          the directory pathname for output state
    */
-  private double runIteration(Configuration conf,
+  private static double runIteration(Configuration conf,
                                      Path input,
                                      Path stateIn,
                                      Path stateOut)
@@ -412,13 +419,13 @@ public final class LDADriver extends Abs
     return findLL(stateOut, conf);
   }
 
-  private void computeDocumentTopicProbabilities(Configuration conf,
-                                     Path input,
-                                     Path stateIn,
-                                     Path outputPath,
-                                     int numTopics,
-                                     int numWords,
-                                     double topicSmoothing)
+  private static void computeDocumentTopicProbabilities(Configuration conf,
+                                                        Path input,
+                                                        Path stateIn,
+                                                        Path outputPath,
+                                                        int numTopics,
+                                                        int numWords,
+                                                        double topicSmoothing)
     throws IOException, InterruptedException, ClassNotFoundException {
     conf.set(STATE_IN_KEY, stateIn.toString());
     conf.set(NUM_TOPICS_KEY, Integer.toString(numTopics));
@@ -437,25 +444,24 @@ public final class LDADriver extends Abs
     job.setInputFormatClass(SequenceFileInputFormat.class);
     job.setJarByClass(LDADriver.class);
 
-    if (job.waitForCompletion(true) == false) {
+    if (!job.waitForCompletion(true)) {
       throw new InterruptedException("LDA failed to compute and output document topic probabilities with: "+ stateIn);
     }
   }
 
   private void computeDocumentTopicProbabilitiesSequential(Configuration conf, Path input, Path outputPath)
-    throws IOException, ClassNotFoundException {
+    throws IOException {
     FileSystem fs = input.getFileSystem(conf);
     Class<? extends Writable> keyClass = peekAtSequenceFileForKeyType(conf, input);
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputPath, keyClass, VectorWritable.class);
 
     Writable key = ReflectionUtils.newInstance(keyClass, conf);
-    VectorWritable vw = new VectorWritable();
+    Writable vw = new VectorWritable();
 
     for(Pair<Writable, VectorWritable> slice : trainingCorpus) {
-      LDAInference.InferredDocument doc;
       Vector wordCounts = slice.getSecond().get();
       try {
-        doc = inference.infer(wordCounts);
+        inference.infer(wordCounts);
       } catch (ArrayIndexOutOfBoundsException e1) {
         throw new IllegalStateException(
          "This is probably because the --numWords argument is set too small.  \n"
@@ -472,7 +478,7 @@ public final class LDADriver extends Abs
     try {
       SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), input, conf);
       return (Class<? extends Writable>) reader.getKeyClass();
-    } catch(IOException ioe) {
+    } catch (IOException ioe) {
       return Text.class;
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java Tue May 10 11:30:14 2011
@@ -23,7 +23,6 @@ import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.common.IntPairWritable;
@@ -94,12 +93,8 @@ public class LDAWordTopicMapper extends 
   }
   
   public void configure(Configuration job) {
-    try {
-      LDAState myState = LDADriver.createState(job);
-      configure(myState);
-    } catch (IOException e) {
-      throw new IllegalStateException("Error creating LDA State!", e);
-    }
+    LDAState myState = LDADriver.createState(job);
+    configure(myState);
   }
   
   @Override

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java Tue May 10 11:30:14 2011
@@ -43,7 +43,7 @@ import org.junit.Test;
 
 public final class TestClusterClassifier extends MahoutTestCase {
   
-  private ClusterClassifier newDMClassifier() {
+  private static ClusterClassifier newDMClassifier() {
     List<Cluster> models = new ArrayList<Cluster>();
     DistanceMeasure measure = new ManhattanDistanceMeasure();
     models.add(new DistanceMeasureCluster(new DenseVector(2).assign(1), 0,
@@ -51,11 +51,10 @@ public final class TestClusterClassifier
     models.add(new DistanceMeasureCluster(new DenseVector(2), 1, measure));
     models.add(new DistanceMeasureCluster(new DenseVector(2).assign(-1), 2,
         measure));
-    ClusterClassifier classifier = new ClusterClassifier(models);
-    return classifier;
+    return new ClusterClassifier(models);
   }
   
-  private ClusterClassifier newClusterClassifier() {
+  private static ClusterClassifier newClusterClassifier() {
     List<Cluster> models = new ArrayList<Cluster>();
     DistanceMeasure measure = new ManhattanDistanceMeasure();
     models.add(new org.apache.mahout.clustering.kmeans.Cluster(new DenseVector(
@@ -64,21 +63,19 @@ public final class TestClusterClassifier
         2), 1, measure));
     models.add(new org.apache.mahout.clustering.kmeans.Cluster(new DenseVector(
         2).assign(-1), 2, measure));
-    ClusterClassifier classifier = new ClusterClassifier(models);
-    return classifier;
+    return new ClusterClassifier(models);
   }
   
-  private ClusterClassifier newSoftClusterClassifier() {
+  private static ClusterClassifier newSoftClusterClassifier() {
     List<Cluster> models = new ArrayList<Cluster>();
     DistanceMeasure measure = new ManhattanDistanceMeasure();
     models.add(new SoftCluster(new DenseVector(2).assign(1), 0, measure));
     models.add(new SoftCluster(new DenseVector(2), 1, measure));
     models.add(new SoftCluster(new DenseVector(2).assign(-1), 2, measure));
-    ClusterClassifier classifier = new ClusterClassifier(models);
-    return classifier;
+    return new ClusterClassifier(models);
   }
   
-  private ClusterClassifier newGaussianClassifier() {
+  private static ClusterClassifier newGaussianClassifier() {
     List<Cluster> models = new ArrayList<Cluster>();
     models.add(new GaussianCluster(new DenseVector(2).assign(1),
         new DenseVector(2).assign(1), 0));
@@ -86,8 +83,7 @@ public final class TestClusterClassifier
         .assign(1), 1));
     models.add(new GaussianCluster(new DenseVector(2).assign(-1),
         new DenseVector(2).assign(1), 2));
-    ClusterClassifier classifier = new ClusterClassifier(models);
-    return classifier;
+    return new ClusterClassifier(models);
   }
   
   private ClusterClassifier writeAndRead(ClusterClassifier classifier)
@@ -99,8 +95,10 @@ public final class TestClusterClassifier
     return readClassifier(config, path, fs);
   }
   
-  private void writeClassifier(ClusterClassifier classifier,
-      Configuration config, Path path, FileSystem fs) throws IOException {
+  private static void writeClassifier(ClusterClassifier classifier,
+                                      Configuration config,
+                                      Path path,
+                                      FileSystem fs) throws IOException {
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, path,
         Text.class, ClusterClassifier.class);
     Writable key = new Text("test");
@@ -108,11 +106,11 @@ public final class TestClusterClassifier
     writer.close();
   }
   
-  private ClusterClassifier readClassifier(Configuration config, Path path,
-      FileSystem fs) throws IOException {
-    Writable key;
+  private static ClusterClassifier readClassifier(Configuration config,
+                                                  Path path,
+                                                  FileSystem fs) throws IOException {
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
-    key = new Text();
+    Writable key = new Text();
     ClusterClassifier classifierOut = new ClusterClassifier();
     reader.next(key, classifierOut);
     reader.close();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java Tue May 10 11:30:14 2011
@@ -33,7 +33,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 import java.util.Arrays;
 
 public final class TestDistributedLanczosSolverCLI extends MahoutTestCase {
@@ -110,8 +110,7 @@ public final class TestDistributedLanczo
   
     Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
     Matrix eigenVectors = new DenseMatrix(30, corpus.numCols());
-    Configuration conf = new Configuration();
-    List<Double> eigenvalues = new ArrayList<Double>();
+    Collection<Double> eigenvalues = new ArrayList<Double>();
 
     output = getTestTempDirPath("output2");
     tmp = getTestTempDirPath("tmp2");
@@ -128,8 +127,8 @@ public final class TestDistributedLanczo
     new DistributedLanczosSolver().new DistributedLanczosSolverJob().run(args);
     Path cleanEigenvectors2 = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
     Matrix eigenVectors2 = new DenseMatrix(35, corpus.numCols());
-    conf = new Configuration();
-    List<Double> newEigenValues = new ArrayList<Double>();
+    Configuration conf = new Configuration();
+    Collection<Double> newEigenValues = new ArrayList<Double>();
 
     int i = 0;
     for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors, conf)) {
@@ -152,7 +151,7 @@ public final class TestDistributedLanczo
       i++;
     }
 
-    List<Integer> oldEigensFound = new ArrayList<Integer>();
+    Collection<Integer> oldEigensFound = new ArrayList<Integer>();
     for(int row = 0; row < eigenVectors.numRows(); row++) {
       Vector oldEigen = eigenVectors.getRow(row);
       if(oldEigen == null) {
@@ -170,7 +169,7 @@ public final class TestDistributedLanczo
     }
     assertEquals("the number of new eigenvectors", 30, i);
 
-    List<Double> oldEigenValuesNotFound = new ArrayList<Double>();
+    Collection<Double> oldEigenValuesNotFound = new ArrayList<Double>();
     for(double d : eigenvalues) {
       boolean found = false;
       for(double newD : newEigenValues) {
@@ -183,7 +182,7 @@ public final class TestDistributedLanczo
       }
     }
     assertEquals("number of old eigenvalues not found: "
-                 + Arrays.toString(oldEigenValuesNotFound.toArray(new Double[0])),
+                 + Arrays.toString(oldEigenValuesNotFound.toArray(new Double[oldEigenValuesNotFound.size()])),
                 0, oldEigenValuesNotFound.size());
     assertEquals("did not find enough old eigenvectors", 16, oldEigensFound.size());
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Tue May 10 11:30:14 2011
@@ -300,25 +300,21 @@ public class DisplayClustering extends F
     return (double) cluster.getNumPoints() / SAMPLE_DATA.size() > significance;
   }
 
-  protected static ClusterClassifier readClassifier(Configuration config, Path path)
-      throws IOException {
-        Writable key;
-        SequenceFile.Reader reader = new SequenceFile.Reader(
-            FileSystem.get(config), path, config);
-        key = new Text();
-        ClusterClassifier classifierOut = new ClusterClassifier();
-        reader.next(key, classifierOut);
-        reader.close();
-        return classifierOut;
-      }
+  protected static ClusterClassifier readClassifier(Configuration config, Path path) throws IOException {
+    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(config), path, config);
+    Writable key = new Text();
+    ClusterClassifier classifierOut = new ClusterClassifier();
+    reader.next(key, classifierOut);
+    reader.close();
+    return classifierOut;
+  }
 
   protected static void writeClassifier(ClusterClassifier classifier, Configuration config, Path path)
-      throws IOException {
-        SequenceFile.Writer writer = new SequenceFile.Writer(
-            FileSystem.get(config), config, path, Text.class,
-            ClusterClassifier.class);
-        Writable key = new Text("test");
-        writer.append(key, classifier);
-        writer.close();
-      }
+    throws IOException {
+    SequenceFile.Writer writer =
+        new SequenceFile.Writer(FileSystem.get(config), config, path, Text.class, ClusterClassifier.class);
+    Writable key = new Text("test");
+    writer.append(key, classifier);
+    writer.close();
+  }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Tue May 10 11:30:14 2011
@@ -42,8 +42,7 @@ import org.slf4j.LoggerFactory;
 
 public class DisplayDirichlet extends DisplayClustering {
   
-  private static final Logger log = LoggerFactory
-      .getLogger(DisplayDirichlet.class);
+  private static final Logger log = LoggerFactory.getLogger(DisplayDirichlet.class);
   
   public DisplayDirichlet() {
     initialize();
@@ -66,8 +65,7 @@ public class DisplayDirichlet extends Di
       for (int k = 0; k < r.length; k++) {
         Cluster model = r[k];
         if (model.count() > significant) {
-          models.append('m').append(k).append(model.asFormatString(null))
-              .append(", ");
+          models.append('m').append(k).append(model.asFormatString(null)).append(", ");
         }
       }
       models.append('\n');
@@ -76,22 +74,23 @@ public class DisplayDirichlet extends Di
     log.info(models.toString());
   }
   
-  protected static void generateResults(
-      ModelDistribution<VectorWritable> modelDist, int numClusters,
-      int numIterations, double alpha0, int thin, int burnin)
-      throws IOException {
+  protected static void generateResults(ModelDistribution<VectorWritable> modelDist,
+                                        int numClusters,
+                                        int numIterations,
+                                        double alpha0,
+                                        int thin,
+                                        int burnin) throws IOException {
     boolean runClusterer = false;
     if (runClusterer) {
-      runSequentialDirichletClusterer(modelDist, numClusters, numIterations, alpha0,
-          thin, burnin);
+      runSequentialDirichletClusterer(modelDist, numClusters, numIterations, alpha0, thin, burnin);
     } else {
       runSequentialDirichletClassifier(modelDist, numClusters, numIterations);
     }
   }
   
-  private static void runSequentialDirichletClassifier(
-      ModelDistribution<VectorWritable> modelDist, int numClusters,
-      int numIterations) throws IOException {
+  private static void runSequentialDirichletClassifier(ModelDistribution<VectorWritable> modelDist,
+                                                       int numClusters,
+                                                       int numIterations) throws IOException {
     List<Cluster> models = new ArrayList<Cluster>();
     for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
       models.add((Cluster) cluster);
@@ -103,13 +102,10 @@ public class DisplayDirichlet extends Di
     Configuration conf = new Configuration();
     writeClassifier(prior, conf, priorClassifier);
     
-    ClusteringPolicy policy = new DirichletClusteringPolicy(numClusters,
-        numIterations);
-    new ClusterIterator(policy).iterate(samples, priorClassifier, output,
-        numIterations);
+    ClusteringPolicy policy = new DirichletClusteringPolicy(numClusters, numIterations);
+    new ClusterIterator(policy).iterate(samples, priorClassifier, output, numIterations);
     for (int i = 1; i <= numIterations; i++) {
-      ClusterClassifier posterior = readClassifier(conf, new Path(output,
-          "classifier-" + i));
+      ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
       List<Cluster> clusters = new ArrayList<Cluster>();    
       for (Cluster cluster : posterior.getModels()) {
         if (isSignificant(cluster)) {
@@ -120,11 +116,13 @@ public class DisplayDirichlet extends Di
     }
   }
   
-  private static void runSequentialDirichletClusterer(
-      ModelDistribution<VectorWritable> modelDist, int numClusters,
-      int numIterations, double alpha0, int thin, int burnin) {
-    DirichletClusterer dc = new DirichletClusterer(SAMPLE_DATA, modelDist,
-        alpha0, numClusters, thin, burnin);
+  private static void runSequentialDirichletClusterer(ModelDistribution<VectorWritable> modelDist,
+                                                      int numClusters,
+                                                      int numIterations,
+                                                      double alpha0,
+                                                      int thin,
+                                                      int burnin) {
+    DirichletClusterer dc = new DirichletClusterer(SAMPLE_DATA, modelDist, alpha0, numClusters, thin, burnin);
     List<Cluster[]> result = dc.cluster(numIterations);
     printModels(result, burnin);
     for (Cluster[] models : result) {
@@ -140,9 +138,7 @@ public class DisplayDirichlet extends Di
   
   public static void main(String[] args) throws Exception {
     VectorWritable modelPrototype = new VectorWritable(new DenseVector(2));
-    ModelDistribution<VectorWritable> modelDist = new GaussianClusterDistribution(
-        modelPrototype);
-    
+    ModelDistribution<VectorWritable> modelDist = new GaussianClusterDistribution(modelPrototype);
     RandomUtils.useTestSeed();
     generateSamples();
     int numIterations = 20;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Tue May 10 11:30:14 2011
@@ -21,6 +21,7 @@ import java.awt.Graphics;
 import java.awt.Graphics2D;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -59,8 +60,6 @@ class DisplayFuzzyKMeans extends Display
     
     Path samples = new Path("samples");
     Path output = new Path("output");
-    int numClusters = 3;
-    int maxIterations = 10;
     Configuration conf = new Configuration();
     HadoopUtil.delete(conf, samples);
     HadoopUtil.delete(conf, output);
@@ -68,20 +67,23 @@ class DisplayFuzzyKMeans extends Display
     DisplayClustering.generateSamples();
     writeSampleData(samples);
     boolean runClusterer = false;
+    int maxIterations = 10;
     if (runClusterer) {
-      runSequentialFuzzyKClusterer(conf, samples, output, measure, numClusters,
-          maxIterations);
+      runSequentialFuzzyKClusterer(conf, samples, output, measure, maxIterations);
     } else {
-      runSequentialFuzzyKClassifier(conf, samples, output, measure,
-          numClusters, maxIterations);
+      int numClusters = 3;
+      runSequentialFuzzyKClassifier(conf, samples, output, measure, numClusters, maxIterations);
     }
     new DisplayFuzzyKMeans();
   }
   
   private static void runSequentialFuzzyKClassifier(Configuration conf,
-      Path samples, Path output, DistanceMeasure measure, int numClusters,
-      int maxIterations) throws IOException {
-    List<Vector> points = new ArrayList<Vector>();
+                                                    Path samples,
+                                                    Path output,
+                                                    DistanceMeasure measure,
+                                                    int numClusters,
+                                                    int maxIterations) throws IOException {
+    Collection<Vector> points = new ArrayList<Vector>();
     for (int i = 0; i < numClusters; i++) {
       points.add(SAMPLE_DATA.get(i).get());
     }
@@ -95,18 +97,19 @@ class DisplayFuzzyKMeans extends Display
     writeClassifier(prior, conf, priorClassifier);
     
     ClusteringPolicy policy = new FuzzyKMeansClusteringPolicy();
-    new ClusterIterator(policy).iterate(samples, priorClassifier, output,
-        maxIterations);
+    new ClusterIterator(policy).iterate(samples, priorClassifier, output, maxIterations);
     for (int i = 1; i <= maxIterations; i++) {
-      ClusterClassifier posterior = readClassifier(conf, new Path(output,
-          "classifier-" + i));
+      ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
       CLUSTERS.add(posterior.getModels());
     }
   }
   
-  private static void runSequentialFuzzyKClusterer(Configuration conf, Path samples,
-      Path output, DistanceMeasure measure, int numClusters, int maxIterations)
-      throws IOException, ClassNotFoundException, InterruptedException {
+  private static void runSequentialFuzzyKClusterer(Configuration conf,
+                                                   Path samples,
+                                                   Path output,
+                                                   DistanceMeasure measure,
+                                                   int maxIterations)
+    throws IOException, ClassNotFoundException, InterruptedException {
     Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
         output, "clusters-0"), 3, measure);
     double threshold = 0.001;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Tue May 10 11:30:14 2011
@@ -21,6 +21,7 @@ import java.awt.Graphics;
 import java.awt.Graphics2D;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -39,13 +40,10 @@ import org.apache.mahout.common.distance
 import org.apache.mahout.math.Vector;
 
 class DisplayKMeans extends DisplayClustering {
-  
-  // static List<List<Cluster>> result;
-  
+
   DisplayKMeans() {
     initialize();
-    this.setTitle("k-Means Clusters (>" + (int) (significance * 100)
-        + "% of population)");
+    this.setTitle("k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
   }
   
   public static void main(String[] args) throws Exception {
@@ -53,8 +51,6 @@ class DisplayKMeans extends DisplayClust
     Path samples = new Path("samples");
     Path output = new Path("output");
     Configuration conf = new Configuration();
-    int numClusters = 3;
-    int maxIterations = 10;
     HadoopUtil.delete(conf, samples);
     HadoopUtil.delete(conf, output);
     
@@ -63,19 +59,21 @@ class DisplayKMeans extends DisplayClust
     writeSampleData(samples);
     boolean runClusterer = false;
     if (runClusterer) {
-      runSequentialKMeansClusterer(conf, samples, output, measure, numClusters,
-          maxIterations);
+      int numClusters = 3;
+      runSequentialKMeansClusterer(conf, samples, output, measure, numClusters);
     } else {
-      runSequentialKMeansClassifier(conf, samples, output, measure,
-          numClusters, maxIterations);
+      int maxIterations = 10;
+      runSequentialKMeansClassifier(conf, samples, output, measure, maxIterations);
     }
     new DisplayKMeans();
   }
   
   private static void runSequentialKMeansClassifier(Configuration conf,
-      Path samples, Path output, DistanceMeasure measure, int numClusters,
-      int maxIterations) throws IOException {
-    List<Vector> points = new ArrayList<Vector>();
+                                                    Path samples,
+                                                    Path output,
+                                                    DistanceMeasure measure,
+                                                    int numClusters) throws IOException {
+    Collection<Vector> points = new ArrayList<Vector>();
     for (int i = 0; i < numClusters; i++) {
       points.add(SAMPLE_DATA.get(i).get());
     }
@@ -91,18 +89,19 @@ class DisplayKMeans extends DisplayClust
     
     int maxIter = 10;
     ClusteringPolicy policy = new KMeansClusteringPolicy();
-    new ClusterIterator(policy).iterate(samples, priorClassifier, output,
-        maxIter);
+    new ClusterIterator(policy).iterate(samples, priorClassifier, output, maxIter);
     for (int i = 1; i <= maxIter; i++) {
-      ClusterClassifier posterior = readClassifier(conf, new Path(output,
-          "classifier-" + i));
+      ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
       CLUSTERS.add(posterior.getModels());
     }
   }
   
-  private static void runSequentialKMeansClusterer(Configuration conf, Path samples,
-      Path output, DistanceMeasure measure, int numClusters, int maxIterations)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void runSequentialKMeansClusterer(Configuration conf,
+                                                   Path samples,
+                                                   Path output,
+                                                   DistanceMeasure measure,
+                                                   int maxIterations)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
         output, "clusters-0"), 3, measure);
     double distanceThreshold = 0.001;