You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC

svn commit: r1352052 [3/7] - in /mahout/trunk: ./ buildtools/ buildtools/src/main/resources/ core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/cf/t...

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Wed Jun 20 12:07:50 2012
@@ -44,7 +44,6 @@ import org.apache.mahout.common.iterator
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
 import org.apache.mahout.common.mapreduce.VectorSumReducer;
 import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,7 +63,8 @@ import java.util.List;
  * <dl>
  * <dt>{@code --input path}</td>
  * <dd>Input path for {@code SequenceFile<IntWritable, VectorWritable>} document vectors. See
- * {@link SparseVectorsFromSequenceFiles} for details on how to generate this input format.</dd>
+ * {@link org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles}
+ *  for details on how to generate this input format.</dd>
  * <dt>{@code --dictionary path}</dt>
  * <dd>Path to dictionary file(s) generated during construction of input document vectors (glob
  * expression supported). If set, this data is scanned to determine an appropriate value for option
@@ -224,7 +224,7 @@ public class CVB0Driver extends Abstract
                         int maxItersPerDoc,
                         int numReduceTasks,
                         boolean backfillPerplexity)
-      throws ClassNotFoundException, IOException, InterruptedException {
+    throws ClassNotFoundException, IOException, InterruptedException {
     // verify arguments
     Preconditions.checkArgument(testFraction >= 0.0 && testFraction <= 1.0,
         "Expected 'testFraction' value in range [0, 1] but found value '%s'", testFraction);
@@ -314,7 +314,7 @@ public class CVB0Driver extends Abstract
       }
     }
     log.info("Completed {} iterations in {} seconds", iterationNumber,
-        (System.currentTimeMillis() - startTime)/1000);
+        (System.currentTimeMillis() - startTime) / 1000);
     log.info("Perplexities: ({})", Joiner.on(", ").join(perplexities));
 
     // write final topic-term and doc-topic distributions
@@ -343,8 +343,7 @@ public class CVB0Driver extends Abstract
   }
 
   private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
-      throws IOException,
-      ClassNotFoundException, InterruptedException {
+    throws IOException, ClassNotFoundException, InterruptedException {
     String jobName = "Calculating perplexity for " + modelPath;
     log.info("About to run: " + jobName);
     Job job = new Job(conf, jobName);
@@ -402,7 +401,7 @@ public class CVB0Driver extends Abstract
    * @throws IOException
    */
   public static double readPerplexity(Configuration conf, Path topicModelStateTemp, int iteration)
-      throws IOException {
+    throws IOException {
     Path perplexityPath = perplexityPath(topicModelStateTemp, iteration);
     FileSystem fs = FileSystem.get(perplexityPath.toUri(), conf);
     if (!fs.exists(perplexityPath)) {
@@ -423,10 +422,9 @@ public class CVB0Driver extends Abstract
     return perplexity / modelWeight;
   }
 
-  private static Job writeTopicModel(Configuration conf, Path modelInput, Path output) throws IOException,
-      InterruptedException, ClassNotFoundException {
-    String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput,
-        output);
+  private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
+    throws IOException, InterruptedException, ClassNotFoundException {
+    String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
     log.info("About to run: " + jobName);
     Job job = new Job(conf, jobName);
     job.setJarByClass(CVB0Driver.class);
@@ -443,9 +441,8 @@ public class CVB0Driver extends Abstract
   }
 
   private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
-      throws IOException, ClassNotFoundException, InterruptedException {
-    String jobName = String.format("Writing final document/topic inference from %s to %s", corpus,
-        output);
+    throws IOException, ClassNotFoundException, InterruptedException {
+    String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
     log.info("About to run: " + jobName);
     Job job = new Job(conf, jobName);
     job.setMapperClass(CVB0DocInferenceMapper.class);
@@ -483,7 +480,7 @@ public class CVB0Driver extends Abstract
   }
 
   private static int getCurrentIterationNumber(Configuration config, Path modelTempDir, int maxIterations)
-      throws IOException {
+    throws IOException {
     FileSystem fs = FileSystem.get(modelTempDir.toUri(), config);
     int iterationNumber = 1;
     Path iterationPath = modelPath(modelTempDir, iterationNumber);
@@ -496,7 +493,8 @@ public class CVB0Driver extends Abstract
   }
 
   public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
-                                  int iterationNumber, int maxIterations, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException {
+                                  int iterationNumber, int maxIterations, int numReduceTasks)
+    throws IOException, ClassNotFoundException, InterruptedException {
     String jobName = String.format("Iteration %d of %d, input path: %s",
         iterationNumber, maxIterations, modelInput);
     log.info("About to run: " + jobName);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java Wed Jun 20 12:07:50 2012
@@ -96,7 +96,7 @@ public class CachingCVB0PerplexityMapper
   @Override
   public void map(IntWritable docId, VectorWritable document, Context context)
       throws IOException, InterruptedException{
-    if (1 > testFraction && random.nextFloat() >= testFraction) {
+    if (testFraction < 1.0f && random.nextFloat() >= testFraction) {
       return;
     }
     context.getCounter(Counters.SAMPLED_DOCUMENTS).increment(1);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Wed Jun 20 12:07:50 2012
@@ -71,19 +71,14 @@ public class InMemoryCollapsedVariationa
   private int numDocuments;
   private double alpha;
   private double eta;
-  private int minDfCt;
-  private double maxDfPct;
+  //private int minDfCt;
+  //private double maxDfPct;
   private boolean verbose = false;
-
-  private Map<String, Integer> termIdMap;
   private String[] terms;  // of length numTerms;
   private Matrix corpusWeights; // length numDocs;
   private double totalCorpusWeight;
   private double initialModelCorpusFraction;
   private Matrix docTopicCounts;
-  private long seed;
-  private TopicModel topicModel;
-  private TopicModel updatedModel;
   private int numTrainingThreads;
   private int numUpdatingThreads;
   private ModelTrainer modelTrainer;
@@ -96,26 +91,34 @@ public class InMemoryCollapsedVariationa
     this.verbose = verbose;
   }
 
-  public InMemoryCollapsedVariationalBayes0(Matrix corpus, String[] terms, int numTopics,
-      double alpha, double eta) {
-    this(corpus, terms, numTopics, alpha, eta, 1, 1, 0, 1234);
+  public InMemoryCollapsedVariationalBayes0(Matrix corpus,
+                                            String[] terms,
+                                            int numTopics,
+                                            double alpha,
+                                            double eta) {
+    this(corpus, terms, numTopics, alpha, eta, 1, 1, 0);
   }
     
-  public InMemoryCollapsedVariationalBayes0(Matrix corpus, String[] terms, int numTopics,
-      double alpha, double eta, int numTrainingThreads, int numUpdatingThreads,
-      double modelCorpusFraction, long seed) {
-    this.seed = seed;
+  public InMemoryCollapsedVariationalBayes0(Matrix corpus,
+                                            String[] terms,
+                                            int numTopics,
+                                            double alpha,
+                                            double eta,
+                                            int numTrainingThreads,
+                                            int numUpdatingThreads,
+                                            double modelCorpusFraction) {
+    //this.seed = seed;
     this.numTopics = numTopics;
     this.alpha = alpha;
     this.eta = eta;
-    this.minDfCt = 0;
-    this.maxDfPct = 1.0f;
+    //this.minDfCt = 0;
+    //this.maxDfPct = 1.0f;
     corpusWeights = corpus;
     numDocuments = corpus.numRows();
     this.terms = terms;
     this.initialModelCorpusFraction = modelCorpusFraction;
     numTerms = terms != null ? terms.length : corpus.numCols();
-    termIdMap = Maps.newHashMap();
+    Map<String, Integer> termIdMap = Maps.newHashMap();
     if (terms != null) {
       for (int t=0; t<terms.length; t++) {
         termIdMap.put(terms[t], t);
@@ -143,12 +146,12 @@ public class InMemoryCollapsedVariationa
   }
 
   private void initializeModel() {
-    topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
-        numUpdatingThreads,
-        initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
+    TopicModel topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
+                                           numUpdatingThreads,
+                                           initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
     topicModel.setConf(getConf());
 
-    updatedModel = initialModelCorpusFraction == 0
+    TopicModel updatedModel = initialModelCorpusFraction == 0
         ? new TopicModel(numTopics, numTerms, eta, alpha, null, terms, numUpdatingThreads, 1)
         : topicModel;
     updatedModel.setConf(getConf());
@@ -157,6 +160,7 @@ public class InMemoryCollapsedVariationa
     modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
   }
 
+  /*
   private void inferDocuments(double convergence, int maxIter, boolean recalculate) {
     for (int docId = 0; docId < corpusWeights.numRows() ; docId++) {
       Vector inferredDocument = topicModel.infer(corpusWeights.viewRow(docId),
@@ -164,6 +168,7 @@ public class InMemoryCollapsedVariationa
       // do what now?
     }
   }
+   */
 
   public void trainDocuments() {
     trainDocuments(0);
@@ -372,7 +377,7 @@ public class InMemoryCollapsedVariationa
       int numUpdateThreads = Integer.parseInt((String)cmdLine.getValue(numUpdateThreadsOpt));
       String topicOutFile = (String)cmdLine.getValue(outputTopicFileOpt);
       String docOutFile = (String)cmdLine.getValue(outputDocFileOpt);
-      String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
+      //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
       boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
       double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption);
 
@@ -390,7 +395,7 @@ public class InMemoryCollapsedVariationa
       start = System.nanoTime();
       InMemoryCollapsedVariationalBayes0 cvb0 =
           new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta,
-                                                 numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234);
+                                                 numTrainThreads, numUpdateThreads, modelCorpusFraction);
       logTime("cvb0 init", System.nanoTime() - start);
 
       start = System.nanoTime();
@@ -398,11 +403,13 @@ public class InMemoryCollapsedVariationa
       cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
       logTime("total training time", System.nanoTime() - start);
 
+      /*
       if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
         cvb0.inferDocuments(0.0, 100, true);
       } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
         cvb0.inferDocuments(0.0, 100, false);
       }
+       */
 
       start = System.nanoTime();
       cvb0.writeModel(new Path(topicOutFile));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Wed Jun 20 12:07:50 2012
@@ -480,7 +480,8 @@ public class TopicModel implements Confi
       }
     }
 
-    @Override public void run() {
+    @Override
+    public void run() {
       while (!shutdown) {
         try {
           Pair<Integer, Vector> pair = queue.poll(1, TimeUnit.SECONDS);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Wed Jun 20 12:07:50 2012
@@ -130,7 +130,7 @@ public class EigencutsDriver extends Abs
       // eigendecomposition (step 3)
       int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
       LanczosState state = new LanczosState(L, eigenrank,
-          new DistributedLanczosSolver().getInitialVector(L));
+          DistributedLanczosSolver.getInitialVector(L));
 
       DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
       U.setConf(new Configuration(conf));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Wed Jun 20 12:07:50 2012
@@ -54,8 +54,7 @@ public class SpectralKMeansDriver extend
   }
 
   @Override
-  public int run(String[] arg0)
-    throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
+  public int run(String[] arg0) throws IOException, ClassNotFoundException, InterruptedException {
     // set up command line options
     Configuration conf = getConf();
     addInputOption();
@@ -143,7 +142,7 @@ public class SpectralKMeansDriver extend
     // unnecessary vectors later
     int overshoot = (int) ((double) clusters * OVERSHOOT_MULTIPLIER);
     DistributedLanczosSolver solver = new DistributedLanczosSolver();
-    LanczosState state = new LanczosState(L, clusters, solver.getInitialVector(L));
+    LanczosState state = new LanczosState(L, clusters, DistributedLanczosSolver.getInitialVector(L));
     Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFF));
     solver.runJob(conf,
                   state,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Wed Jun 20 12:07:50 2012
@@ -91,10 +91,9 @@ public final class ClusterOutputPostProc
    * Creates the directory to put post processed clusters.
    */
   private void createPostProcessDirectory() throws IOException {
-    if (!fileSystem.exists(clusterPostProcessorOutput)) {
-      if (!fileSystem.mkdirs(clusterPostProcessorOutput)) {
-        throw new IOException("Error creating cluster post processor directory");
-      }
+    if (!fileSystem.exists(clusterPostProcessorOutput) &&
+        !fileSystem.mkdirs(clusterPostProcessorOutput)) {
+      throw new IOException("Error creating cluster post processor directory");
     }
   }
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Wed Jun 20 12:07:50 2012
@@ -232,8 +232,6 @@ public abstract class AbstractJob extend
     this.outputOption = addOption(DefaultOptionCreator.outputOption().create());
   }
 
-
-
   /** Build an option with the given parameters. Name and description are
    *  required.
    * 
@@ -246,21 +244,21 @@ public abstract class AbstractJob extend
    * @return the option.
    */
   protected static Option buildOption(String name,
-                                    String shortName,
-                                    String description,
-                                    boolean hasArg,
-                                    boolean required,
-                                    String defaultValue) {
+                                      String shortName,
+                                      String description,
+                                      boolean hasArg,
+                                      boolean required,
+                                      String defaultValue) {
 
     return buildOption(name, shortName, description, hasArg, 1, 1, required, defaultValue);
   }
 
   protected static Option buildOption(String name,
-                                    String shortName,
-                                    String description,
-                                    boolean hasArg, int min, int max,
-                                    boolean required,
-                                    String defaultValue) {
+                                      String shortName,
+                                      String description,
+                                      boolean hasArg, int min, int max,
+                                      boolean required,
+                                      String defaultValue) {
 
     DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
         .withRequired(required);
@@ -281,10 +279,8 @@ public abstract class AbstractJob extend
 
     return optBuilder.create();
   }
-  //convenience method
 
   /**
-   *
    * @param name The name of the option
    * @return the {@link org.apache.commons.cli2.Option} with the name, else null
    */
@@ -311,7 +307,6 @@ public abstract class AbstractJob extend
    *  names used for keys are the option name parameter prefixed by '--'.
    *
    * @see #parseArguments(String[], boolean, boolean)  -- passes in false, false for the optional args.
-   *
    */
   public Map<String, List<String>> parseArguments(String[] args) throws IOException {
     return parseArguments(args, false, false);
@@ -323,9 +318,9 @@ public abstract class AbstractJob extend
    * @param inputOptional if false, then the input option, if set, need not be present.  If true and input is an option and there is no input, then throw an error
    * @param outputOptional if false, then the output option, if set, need not be present.  If true and output is an option and there is no output, then throw an error
    * @return the args parsed into a map.
-   * @throws IOException
    */
-  public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional) throws IOException{
+  public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional)
+    throws IOException {
     Option helpOpt = addOption(DefaultOptionCreator.helpOption());
     addOption("tempDir", null, "Intermediate output directory", "temp");
     addOption("startPhase", null, "First phase to run", "0");
@@ -388,7 +383,7 @@ public abstract class AbstractJob extend
    */
   public String getOption(String optionName) {
     List<String> list = argMap.get(keyFor(optionName));
-    if (list != null && list.isEmpty() == false) {
+    if (list != null && !list.isEmpty()) {
       return list.get(0);
     }
     return null;
@@ -411,7 +406,8 @@ public abstract class AbstractJob extend
   /**
    * Options can occur multiple times, so return the list
    * @param optionName The unadorned (no "--" prefixing it) option name
-   * @return The values, else null.  If the option is present, but has no values, then the result will be an empty list (Collections.emptyList())
+   * @return The values, else null.  If the option is present, but has no values, then the result will be an
+   * empty list (Collections.emptyList())
    */
   public List<String> getOptions(String optionName) {
     return argMap.get(keyFor(optionName));
@@ -431,13 +427,14 @@ public abstract class AbstractJob extend
    * @param matrix
    * @return the cardinality of the vector
    */
-  public int getDimensions(Path matrix) throws IOException, InstantiationException, IllegalAccessException {
+  public int getDimensions(Path matrix) throws IOException {
 
     SequenceFile.Reader reader = null;
     try {
       reader = new SequenceFile.Reader(FileSystem.get(getConf()), matrix, getConf());
 
-      Writable row = (Writable) reader.getKeyClass().newInstance();
+      Writable row = ClassUtils.instantiateAs(reader.getKeyClass().asSubclass(Writable.class), Writable.class);
+
       VectorWritable vectorWritable = new VectorWritable();
 
       Preconditions.checkArgument(reader.getValueClass().equals(VectorWritable.class),
@@ -453,7 +450,8 @@ public abstract class AbstractJob extend
     }
   }
 
-  /** Obtain input and output directories from command-line options or hadoop
+  /**
+   * Obtain input and output directories from command-line options or hadoop
    *  properties. If {@code addInputOption} or {@code addOutputOption}
    *  has been called, this method will throw an {@code OptionException} if
    *  no source (command-line or property) for that value is present. 
@@ -461,7 +459,6 @@ public abstract class AbstractJob extend
    *  non-null only if specified as a hadoop property. Command-line options
    *  take precedence over hadoop properties.
    *
-   * @param cmdLine
    * @throws IllegalArgumentException if either inputOption is present,
    *   and neither {@code --input} nor {@code -Dmapred.input dir} are
    *   specified or outputOption is present and neither {@code --output}
@@ -487,9 +484,9 @@ public abstract class AbstractJob extend
       this.outputPath = new Path(conf.get("mapred.output.dir"));
     }
 
-    Preconditions.checkArgument(inputOptional == true || inputOption == null || inputPath != null,
+    Preconditions.checkArgument(inputOptional || inputOption == null || inputPath != null,
         "No input specified or -Dmapred.input.dir must be provided to specify input directory");
-    Preconditions.checkArgument(outputOptional == true || outputOption == null || outputPath != null,
+    Preconditions.checkArgument(outputOptional || outputOption == null || outputPath != null,
         "No output specified:  or -Dmapred.output.dir must be provided to specify output directory");
   }
 
@@ -498,11 +495,12 @@ public abstract class AbstractJob extend
 
       // the option appeared on the command-line, or it has a value
       // (which is likely a default value). 
-      if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null || (cmdLine.getValues(o) != null && cmdLine.getValues(o).isEmpty() == false)) {
+      if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null ||
+          (cmdLine.getValues(o) != null && !cmdLine.getValues(o).isEmpty())) {
 
         // nulls are ok, for cases where options are simple flags.
-        List vo = cmdLine.getValues(o);
-        if (vo != null && vo.isEmpty() == false) {
+        List<?> vo = cmdLine.getValues(o);
+        if (vo != null && !vo.isEmpty()) {
           List<String> vals = new ArrayList<String>();
           for (Object o1 : vo) {
             vals.add(o1.toString());
@@ -523,7 +521,7 @@ public abstract class AbstractJob extend
    */
   public static String getOption(Map<String, List<String>> args, String optName) {
     List<String> res = args.get(optName);
-    if (res != null && res.isEmpty() == false) {
+    if (res != null && !res.isEmpty()) {
       return res.get(0);
     }
     return null;
@@ -581,8 +579,8 @@ public abstract class AbstractJob extend
   }
 
   /**
-   * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is obsolete when MultipleInputs is available
-   * again
+   * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is
+   * obsolete when MultipleInputs is available again
    */
   public static void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
       throws IOException {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java Wed Jun 20 12:07:50 2012
@@ -18,8 +18,10 @@
 package org.apache.mahout.common;
 
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.OptionException;
@@ -51,7 +53,7 @@ public final class CommandLineUtil {
     fmt.printHelp("<command> [Generic Options] [Job-Specific Options]", 
         "Generic Options:", ops, "");
     
-    PrintWriter pw = new PrintWriter(System.out, true);
+    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true);
     HelpFormatter formatter = new HelpFormatter();
     formatter.setGroup(group);
     formatter.setPrintWriter(pw);
@@ -69,7 +71,7 @@ public final class CommandLineUtil {
     fmt.printHelp("<command> [Generic Options] [Job-Specific Options]",
         "Generic Options:", ops, "");
 
-    PrintWriter pw = new PrintWriter(System.out, true);
+    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true);
     HelpFormatter formatter = new HelpFormatter();
     formatter.setGroup(group);
     formatter.setPrintWriter(pw);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java Wed Jun 20 12:07:50 2012
@@ -29,7 +29,8 @@ public final class StringUtils {
   
   private static final XStream XSTREAM = new XStream();
   private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
-  
+  private static final Pattern XMLRESERVED = Pattern.compile("\"|\\&|\\<|\\>|\'");
+
   private StringUtils() {
   // do nothing
   }
@@ -56,7 +57,7 @@ public final class StringUtils {
     return (T) XSTREAM.fromXML(str);
   }
 
-  public static String escapeXML(String input) {
-    return input.replaceAll("\"|\\&|\\<|\\>|\'", "_");
+  public static String escapeXML(CharSequence input) {
+    return XMLRESERVED.matcher(input).replaceAll("_");
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java Wed Jun 20 12:07:50 2012
@@ -147,8 +147,6 @@ public class MahalanobisDistanceMeasure 
     if (v1.size() != v2.size()) {
       throw new CardinalityException(v1.size(), v2.size());
     }
-    if (inverseCovarianceMatrix== null)
-      System.out.println();
     return Math.sqrt(v1.minus(v2).dot(Algebra.mult(inverseCovarianceMatrix, v1.minus(v2))));
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Wed Jun 20 12:07:50 2012
@@ -200,7 +200,7 @@ public final class MahoutDriver {
   }
 
   private static boolean isDeprecated(Properties mainClasses, String keyString) {
-    return shortName(mainClasses.getProperty(keyString)).equalsIgnoreCase("deprecated");
+    return "deprecated".equalsIgnoreCase(shortName(mainClasses.getProperty(keyString)));
   }
 
   private static Properties loadProperties(String resource) throws IOException {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Wed Jun 20 12:07:50 2012
@@ -50,8 +50,6 @@ import org.apache.mahout.common.iterator
 import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
 import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
 import org.apache.mahout.math.list.IntArrayList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * 
@@ -95,7 +93,7 @@ public final class PFPGrowth {
       throw new IOException("Cannot read Frequency list from Distributed Cache");
     }
     if (files.length != 1) {
-      throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ")");
+      throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
     }
     FileSystem fs = FileSystem.getLocal(conf);
     Path fListLocalPath = fs.makeQualified(files[0]);
@@ -106,7 +104,7 @@ public final class PFPGrowth {
         throw new IOException("Cannot read Frequency list from Distributed Cache");
       }
       if (filesURIs.length != 1) {
-        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ")");
+        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
       }
       fListLocalPath = new Path(filesURIs[0].getPath());
     }
@@ -187,8 +185,9 @@ public final class PFPGrowth {
     IntArrayList ret = new IntArrayList();
     int start = groupId * maxPerGroup;
     int end = start + maxPerGroup;
-    if (end > numFeatures) 
+    if (end > numFeatures) {
       end = numFeatures;
+    }
     for (int i = start; i < end; i++) {
       ret.add(i);
     }
@@ -234,13 +233,12 @@ public final class PFPGrowth {
     saveFList(fList, params, conf);
 
     // set param to control group size in MR jobs
-    int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, 
-                                  PFPGrowth.NUM_GROUPS_DEFAULT);
+    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
-    if (fList.size() % numGroups != 0) 
+    if (fList.size() % numGroups != 0) {
       maxPerGroup++;
+    }
     params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
-    fList = null;
 
     startParallelFPGrowth(params, conf);
     startAggregating(params, conf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Wed Jun 20 12:07:50 2012
@@ -41,8 +41,7 @@ public class ParallelFPGrowthMapper exte
   private final OpenObjectIntHashMap<String> fMap = new OpenObjectIntHashMap<String>();
   private Pattern splitter;
   private int maxPerGroup;
-
-  private IntWritable wGroupID = new IntWritable();
+  private final IntWritable wGroupID = new IntWritable();
 
   @Override
   protected void map(LongWritable offset, Text input, Context context)
@@ -95,6 +94,6 @@ public class ParallelFPGrowthMapper exte
     splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN,
                                           PFPGrowth.SPLITTER.toString()));
     
-    maxPerGroup = Integer.valueOf(params.getInt(PFPGrowth.MAX_PER_GROUP, 0));
+    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Wed Jun 20 12:07:50 2012
@@ -36,6 +36,7 @@ import org.apache.mahout.fpm.pfpgrowth.c
 import org.apache.mahout.fpm.pfpgrowth.convertors.integer.IntegerStringOutputConverter;
 import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
 import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
+import org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds;
 import org.apache.mahout.math.list.IntArrayList;
 import org.apache.mahout.math.list.LongArrayList;
 
@@ -44,22 +45,18 @@ import org.apache.mahout.math.list.LongA
  * outputs the the Top K frequent Patterns for each group.
  * 
  */
-public class ParallelFPGrowthReducer extends Reducer<IntWritable,TransactionTree,Text,TopKStringPatterns> {
+public final class ParallelFPGrowthReducer extends Reducer<IntWritable,TransactionTree,Text,TopKStringPatterns> {
 
   private final List<String> featureReverseMap = Lists.newArrayList();
   private final LongArrayList freqList = new LongArrayList();
-  
   private int maxHeapSize = 50;
-  
   private int minSupport = 3;
-
   private int numFeatures;
   private int maxPerGroup;
-
   private boolean useFP2;
 
   private static class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
-    private Iterator<Pair<IntArrayList,Long>> innerIter;
+    private final Iterator<Pair<IntArrayList,Long>> innerIter;
 
     private IteratorAdapter(Iterator<Pair<IntArrayList,Long>> transactionIter) {
       innerIter = transactionIter;
@@ -73,7 +70,7 @@ public class ParallelFPGrowthReducer ext
     @Override
     public Pair<List<Integer>,Long> next() {
       Pair<IntArrayList,Long> innerNext = innerIter.next();
-      return new Pair(innerNext.getFirst().toList(), innerNext.getSecond());
+      return new Pair<List<Integer>,Long>(innerNext.getFirst().toList(), innerNext.getSecond());
     }
 
     @Override
@@ -99,18 +96,16 @@ public class ParallelFPGrowthReducer ext
     Collections.sort(localFList, new CountDescendingPairComparator<Integer,Long>());
     
     if (useFP2) {
-      org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds fpGrowth = 
-        new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds();
-      fpGrowth.generateTopKFrequentPatterns(
+      FPGrowthIds.generateTopKFrequentPatterns(
           cTree.iterator(),
           freqList,
           minSupport,
           maxHeapSize,
           PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures),
           new IntegerStringOutputConverter(
-              new ContextWriteOutputCollector<IntWritable,TransactionTree,Text,TopKStringPatterns>(context),
+              new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(context),
               featureReverseMap),
-          new ContextStatusUpdater<IntWritable,TransactionTree,Text,TopKStringPatterns>(context));
+          new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context));
     } else {
       FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
       fpGrowth.generateTopKFrequentPatterns(

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Wed Jun 20 12:07:50 2012
@@ -42,10 +42,10 @@ public class TransactionIterator<T> exte
         new Function<Pair<List<T>,Long>, Pair<int[],Long>>() {
           @Override
           public Pair<int[],Long> apply(Pair<List<T>,Long> from) {
-            int index = 0;
-	    if (from == null) {
+            if (from == null) {
 	      return null;
 	    }
+            int index = 0;
             for (T attribute : from.getFirst()) {
               if (attributeIdMapping.containsKey(attribute)) {
                 transactionBuffer[index++] = attributeIdMapping.get(attribute);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Wed Jun 20 12:07:50 2012
@@ -163,7 +163,8 @@ public final class FrequentPatternMaxHea
     return true;
   }
 
-  public String toString() { 
+  @Override
+  public String toString() {
     StringBuilder sb = new StringBuilder("FreqPatHeap{");
     String sep = "";
     for (Pattern p : getHeap()) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java Wed Jun 20 12:07:50 2012
@@ -48,10 +48,13 @@ import  org.apache.mahout.fpm.pfpgrowth.
 /**
  * Implementation of PFGrowth Algorithm
  */
-public class FPGrowthIds {
+public final class FPGrowthIds {
 
   private static final Logger log = LoggerFactory.getLogger(FPGrowthIds.class);
 
+  private FPGrowthIds() {
+  }
+
   public static List<Pair<String,TopKStringPatterns>> readFrequentPattern(Configuration conf, Path path) {
     List<Pair<String,TopKStringPatterns>> ret = Lists.newArrayList();
     // key is feature value is count
@@ -68,7 +71,7 @@ public class FPGrowthIds {
    *
    * @param transactionStream
    *          Iterator of transaction
-   * @param frequencyList
+   * @param attributeFrequency
    *          list of frequent features and their support value
    * @param minSupport
    *          minimum support of the transactions
@@ -83,13 +86,13 @@ public class FPGrowthIds {
    *          written
    * @throws IOException
    */
-  public final void generateTopKFrequentPatterns(Iterator<Pair<IntArrayList,Long>> transactionStream,
-                                                 LongArrayList attributeFrequency,
-                                                 long minSupport,
-                                                 int k,
-                                                 IntArrayList returnableFeatures,
-                                                 OutputCollector<Integer,List<Pair<List<Integer>,Long>>> output,
-                                                 StatusUpdater updater) throws IOException {
+  public static void generateTopKFrequentPatterns(Iterator<Pair<IntArrayList, Long>> transactionStream,
+                                                  LongArrayList attributeFrequency,
+                                                  long minSupport,
+                                                  int k,
+                                                  IntArrayList returnableFeatures,
+                                                  OutputCollector<Integer, List<Pair<List<Integer>, Long>>> output,
+                                                  StatusUpdater updater) throws IOException {
 
     for (int i = 0; i < attributeFrequency.size(); i++) {
       if (attributeFrequency.get(i) < minSupport) {
@@ -116,10 +119,12 @@ public class FPGrowthIds {
 
   private static class IdentityMapping extends AbstractMap<Integer, Integer> {
 
+    @Override
     public Set<Map.Entry<Integer,Integer>> entrySet() {
       throw new IllegalStateException();
     }
 
+    @Override
     public Integer get(Object key) {
       return (Integer) key;
     }
@@ -142,12 +147,12 @@ public class FPGrowthIds {
    *          integer to A
    * @return Top K Frequent Patterns for each feature and their support
    */
-  private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
-                                                       long minSupportValue,
-                                                       int k,
-                                                       IntArrayList requiredFeatures,
-                                                       TopKPatternsOutputConverter<Integer> outputCollector,
-                                                       StatusUpdater updater) throws IOException {
+  private static Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
+                                                              long minSupportValue,
+                                                              int k,
+                                                              IntArrayList requiredFeatures,
+                                                              TopKPatternsOutputConverter<Integer> outputCollector,
+                                                              StatusUpdater updater) throws IOException {
 
     Map<Integer,FrequentPatternMaxHeap> patterns = Maps.newHashMap();
     requiredFeatures.sort();
@@ -182,8 +187,6 @@ public class FPGrowthIds {
    *          minimum support of the pattern to be mined
    * @param k
    *          Max value of the Size of the Max-Heap in which Patterns are held
-   * @param featureSetSize
-   *          number of features
    * @param returnFeatures
    *          the id's of the features for which Top K patterns have to be mined
    * @param topKPatternsOutputCollector
@@ -191,19 +194,18 @@ public class FPGrowthIds {
    *          format to the corresponding A Format
    * @return Top K frequent patterns for each attribute
    */
-  private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
-    Iterator<Pair<IntArrayList,Long>> transactions,
-    LongArrayList attributeFrequency,
-    long minSupport,
-    int k,
-    IntArrayList returnFeatures, 
-    TopKPatternsOutputConverter<Integer> topKPatternsOutputCollector,
-    StatusUpdater updater) throws IOException {
+  private static Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
+      Iterator<Pair<IntArrayList, Long>> transactions,
+      LongArrayList attributeFrequency,
+      long minSupport,
+      int k,
+      IntArrayList returnFeatures,
+      TopKPatternsOutputConverter<Integer> topKPatternsOutputCollector,
+      StatusUpdater updater) throws IOException {
 
     FPTree tree = new FPTree(attributeFrequency, minSupport);
 
     // Constructing initial FPTree from the list of transactions
-    int nodecount = 0;
     int i = 0;
     while (transactions.hasNext()) {
       Pair<IntArrayList,Long> transaction = transactions.next();
@@ -215,8 +217,6 @@ public class FPGrowthIds {
       }
     }
 
-    log.info("Number of Nodes in the FP Tree: {}", nodecount);
-
     return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
   }
 
@@ -282,10 +282,12 @@ public class FPGrowthIds {
         int[] qints = q.getPattern();
         
         Pattern pq = new Pattern();
-        for (int pi = 0; pi < p.length(); pi++) 
+        for (int pi = 0; pi < p.length(); pi++) {
           pq.add(pints[pi], p.support());
-        for (int qi = 0; qi < q.length(); qi++) 
+        }
+        for (int qi = 0; qi < q.length(); qi++) {
           pq.add(qints[qi], q.support());
+        }
         pats.insert(pq);
       }
     }
@@ -293,8 +295,9 @@ public class FPGrowthIds {
     for (Pattern q : qPats.getHeap()) {
       Pattern qq = new Pattern();
       int[] qints = q.getPattern();
-      for (int qi = 0; qi < q.length(); qi++) 
+      for (int qi = 0; qi < q.length(); qi++) {
         qq.add(qints[qi], q.support());
+      }
       pats.insert(qq);
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java Wed Jun 20 12:07:50 2012
@@ -220,8 +220,6 @@ public class FPGrowthObj<A extends Compa
     return patterns;
   }
 
-      
-
   /**
    * Internal TopKFrequentPattern Generation algorithm, which represents the A's
    * as integers and transforms features to use only integers
@@ -234,8 +232,6 @@ public class FPGrowthObj<A extends Compa
    *          minimum support of the pattern to be mined
    * @param k
    *          Max value of the Size of the Max-Heap in which Patterns are held
-   * @param featureSetSize
-   *          number of features
    * @param returnFeatures
    *          the id's of the features for which Top K patterns have to be mined
    * @param topKPatternsOutputCollector
@@ -248,21 +244,20 @@ public class FPGrowthObj<A extends Compa
     long[] attributeFrequency,
     long minSupport,
     int k,
-    //int featureSetSize,
     Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
     StatusUpdater updater) throws IOException {
 
     FPTree tree = new FPTree(attributeFrequency, minSupport);
 
     // Constructing initial FPTree from the list of transactions
-    int nodecount = 0;
     int i = 0;
     while (transactions.hasNext()) {
       Pair<int[],Long> transaction = transactions.next();
       List<Integer> iLst = Lists.newArrayList();
       int[] iArr = transaction.getFirst();
-      for (int j = 0; j < iArr.length; j++)
-        iLst.add(iArr[j]);
+      for (int anIArr : iArr) {
+        iLst.add(anIArr);
+      }
       tree.accumulate(iLst, transaction.getSecond());
       i++;
       if (i % 10000 == 0) {
@@ -270,8 +265,6 @@ public class FPGrowthObj<A extends Compa
       }
     }
 
-    log.info("Number of Nodes in the FP Tree: {}", nodecount);
-
     return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
   }
 
@@ -337,10 +330,12 @@ public class FPGrowthObj<A extends Compa
         int[] qints = q.getPattern();
         
         Pattern pq = new Pattern();
-        for (int pi = 0; pi < p.length(); pi++) 
+        for (int pi = 0; pi < p.length(); pi++) {
           pq.add(pints[pi], p.support());
-        for (int qi = 0; qi < q.length(); qi++) 
+        }
+        for (int qi = 0; qi < q.length(); qi++) {
           pq.add(qints[qi], q.support());
+        }
         pats.insert(pq);
       }
     }
@@ -348,8 +343,9 @@ public class FPGrowthObj<A extends Compa
     for (Pattern q : qPats.getHeap()) {
       Pattern qq = new Pattern();
       int[] qints = q.getPattern();
-      for (int qi = 0; qi < q.length(); qi++) 
+      for (int qi = 0; qi < q.length(); qi++) {
         qq.add(qints[qi], q.support());
+      }
       pats.insert(qq);
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Wed Jun 20 12:07:50 2012
@@ -33,22 +33,22 @@ import org.apache.mahout.math.map.OpenIn
 public class FPTree {
 
   private final AttrComparator attrComparator = new AttrComparator();
-  private FPNode root;
-  private long minSupport;
-  private LongArrayList attrCountList;
-  private OpenIntObjectHashMap attrNodeLists; 
+  private final FPNode root;
+  private final long minSupport;
+  private final LongArrayList attrCountList;
+  private final OpenIntObjectHashMap<List<FPNode>> attrNodeLists;
 
   public static final class FPNode {
-    private FPNode parent;
-    private OpenIntObjectHashMap childMap;
-    private int attribute;
+    private final FPNode parent;
+    private final OpenIntObjectHashMap<FPNode> childMap;
+    private final int attribute;
     private long count;
 
     private FPNode(FPNode parent, int attribute, long count) {
       this.parent = parent;
       this.attribute = attribute;
       this.count = count;
-      this.childMap = new OpenIntObjectHashMap();
+      this.childMap = new OpenIntObjectHashMap<FPNode>();
     }
 
     private void addChild(FPNode child) {
@@ -68,7 +68,7 @@ public class FPTree {
     }
 
     public FPNode child(int attribute) {
-      return (FPNode) childMap.get(attribute);
+      return childMap.get(attribute);
     }
 
     public int attribute() {
@@ -76,7 +76,7 @@ public class FPTree {
     }
 
     public void accumulate(long incr) {
-      count = count + incr;
+      count += incr;
     }
 
     public long count() {
@@ -94,7 +94,7 @@ public class FPTree {
   public FPTree(LongArrayList attrCountList, long minSupport) {
     this.root = new FPNode(null, -1, 0);
     this.attrCountList = attrCountList;
-    this.attrNodeLists = new OpenIntObjectHashMap();
+    this.attrNodeLists = new OpenIntObjectHashMap<List<FPNode>>();
     this.minSupport = minSupport;
   }
 
@@ -107,14 +107,15 @@ public class FPTree {
   public FPTree(long[] attrCounts, long minSupport) {
     this.root = new FPNode(null, -1, 0);
     this.attrCountList = new LongArrayList();
-    for (int i = 0; i < attrCounts.length; i++) 
+    for (int i = 0; i < attrCounts.length; i++) {
       if (attrCounts[i] > 0) {
         if (attrCountList.size() < (i + 1)) {
           attrCountList.setSize(i + 1);
         }
         attrCountList.set(i, attrCounts[i]);
       }
-    this.attrNodeLists = new OpenIntObjectHashMap();
+    }
+    this.attrNodeLists = new OpenIntObjectHashMap<List<FPNode>>();
     this.minSupport = minSupport;
   }
 
@@ -145,13 +146,14 @@ public class FPTree {
     Collections.sort(items, attrComparator);
     
     FPNode currNode = root;
-    for (int i = 0; i < items.size(); i++) {
-      int item = items.get(i);
+    for (Integer item : items) {
       long attrCount = 0;
-      if (item < attrCountList.size())
+      if (item < attrCountList.size()) {
         attrCount = attrCountList.get(item);
-      if (attrCount < minSupport)
+      }
+      if (attrCount < minSupport) {
         continue;
+      }
 
       FPNode next = currNode.child(item);
       if (next == null) {
@@ -179,17 +181,17 @@ public class FPTree {
     Collections.sort(items, attrComparator);
     
     FPNode currNode = root;
-    for (int i = 0; i < items.size(); i++) {
-      int item = items.get(i);
+    for (Integer item : items) {
       long attrCount = attrCountList.get(item);
-      if (attrCount < minSupport)
+      if (attrCount < minSupport) {
         continue;
+      }
 
       FPNode next = currNode.child(item);
       if (next == null) {
         next = new FPNode(currNode, item, count);
         currNode.addChild(next);
-        List<FPNode> nodeList = (List<FPNode>) attrNodeLists.get(item);
+        List<FPNode> nodeList = attrNodeLists.get(item);
         if (nodeList == null) {
           nodeList = Lists.newArrayList();
           attrNodeLists.put(item, nodeList);
@@ -210,8 +212,9 @@ public class FPTree {
   public Iterable<Integer> attrIterable() {
     List<Integer> attrs = Lists.newArrayList();
     for (int i = 0; i < attrCountList.size(); i++) {
-      if (attrCountList.get(i) > 0)
+      if (attrCountList.get(i) > 0) {
         attrs.add(i);
+      }
     }
     Collections.sort(attrs, attrComparator);
     return attrs;
@@ -224,8 +227,9 @@ public class FPTree {
   public Iterable<Integer> attrIterableRev() {
     List<Integer> attrs = Lists.newArrayList();
     for (int i = 0; i < attrCountList.size(); i++) {
-      if (attrCountList.get(i) > 0)
+      if (attrCountList.get(i) > 0) {
         attrs.add(i);
+      }
     }
     Collections.sort(attrs, Collections.reverseOrder(attrComparator));
     return attrs;
@@ -237,7 +241,7 @@ public class FPTree {
    */
   public FPTree createMoreFreqConditionalTree(int targetAttr) {
     LongArrayList counts = new LongArrayList();
-    List<FPNode> nodeList = (List<FPNode>) attrNodeLists.get(targetAttr);
+    List<FPNode> nodeList = attrNodeLists.get(targetAttr);
 
     for (FPNode currNode : nodeList) {
       long pathCount = currNode.count();
@@ -251,21 +255,23 @@ public class FPTree {
         currNode = currNode.parent();
       }
     }
-    if (counts.get(targetAttr) != attrCountList.get(targetAttr))
+    if (counts.get(targetAttr) != attrCountList.get(targetAttr)) {
       throw new IllegalStateException("mismatched counts for targetAttr="
-                                      + targetAttr + ", (" + counts.get(targetAttr)
-                                      + " != " + attrCountList.get(targetAttr) + "); "
-                                      + "thisTree=" + this + "\n");
+                                          + targetAttr + ", (" + counts.get(targetAttr)
+                                          + " != " + attrCountList.get(targetAttr) + "); "
+                                          + "thisTree=" + this + '\n');
+    }
     counts.set(targetAttr, 0L);
 
     FPTree toRet = new FPTree(counts, minSupport);
     IntArrayList attrLst = new IntArrayList();
-    for (FPNode currNode : (List<FPNode>) attrNodeLists.get(targetAttr)) {
+    for (FPNode currNode : attrNodeLists.get(targetAttr)) {
       long count = currNode.count();
       attrLst.clear();
       while (currNode != root) {
-        if (currNode.count() < count) 
+        if (currNode.count() < count) {
           throw new IllegalStateException();
+        }
         attrLst.add(currNode.attribute());
         currNode = currNode.parent();
       }
@@ -277,16 +283,20 @@ public class FPTree {
 
   // biggest count or smallest attr number goes first
   private class AttrComparator implements Comparator<Integer> {
+    @Override
     public int compare(Integer a, Integer b) {
 
       long aCnt = 0;
-      if (a < attrCountList.size())
+      if (a < attrCountList.size()) {
         aCnt = attrCountList.get(a);
+      }
       long bCnt = 0;
-      if (b < attrCountList.size())
+      if (b < attrCountList.size()) {
         bCnt = attrCountList.get(b);
-      if (aCnt == bCnt)
+      }
+      if (aCnt == bCnt) {
         return a - b;
+      }
       return (bCnt - aCnt) < 0 ? -1 : 1;
     }
   }
@@ -305,8 +315,9 @@ public class FPTree {
     FPNode currNode = root;
     while (currNode.numChildren() == 1) {
       currNode = currNode.children().iterator().next();
-      if (pAttrCountList.size() <= currNode.attribute())
+      if (pAttrCountList.size() <= currNode.attribute()) {
         pAttrCountList.setSize(currNode.attribute() + 1);
+      }
       pAttrCountList.set(currNode.attribute(), currNode.count());
       qAttrCountList.set(currNode.attribute(), 0);
     }
@@ -320,17 +331,18 @@ public class FPTree {
 
   private long recursivelyAddPrefixPats(FPTree pTree, FPTree qTree, FPNode node,
                                         IntArrayList items) {
-    long added = 0;
     long count = node.count();
     int attribute = node.attribute();
     if (items == null) {
       // at root
-      if (!(node == root))
+      if (!(node == root)) {
         throw new IllegalStateException();
+      }
       items = new IntArrayList();
     } else {
       items.add(attribute);
     }
+    long added = 0;
     for (FPNode child : node.children()) {
       added += recursivelyAddPrefixPats(pTree, qTree, child, items);
     }
@@ -350,7 +362,7 @@ public class FPTree {
     return added;
   }
 
-  private void toStringHelper(StringBuilder sb, FPNode currNode, String prefix) {
+  private static void toStringHelper(StringBuilder sb, FPNode currNode, String prefix) {
     if (currNode.numChildren() == 0) {
       sb.append(prefix).append("-{attr:").append(currNode.attribute())
         .append(", cnt:").append(currNode.count()).append("}\n");
@@ -363,15 +375,17 @@ public class FPTree {
         fakePre.append(' ');
       }
       int i = 0;
-      for (FPNode child : currNode.children()) 
+      for (FPNode child : currNode.children()) {
         toStringHelper(sb, child, (i++ == 0 ? newPre : fakePre).toString() + '-' + i + "->");
+      }
     }
   }
 
+  @Override
   public String toString() {
     StringBuilder sb = new StringBuilder("[FPTree\n");
     toStringHelper(sb, root, "  ");
-    sb.append("]");
+    sb.append(']');
     return sb.toString();
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Wed Jun 20 12:07:50 2012
@@ -20,7 +20,6 @@ package org.apache.mahout.math.hadoop;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
 import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configurable;
@@ -192,9 +191,7 @@ public class DistributedRowMatrix implem
     return out;
   }
 
-  public Vector columnMeans() throws IOException, InterruptedException, ClassNotFoundException,
-  IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException,
-  InvocationTargetException, NoSuchMethodException {
+  public Vector columnMeans() throws IOException {
     return columnMeans("SequentialAccessSparseVector");
   }
 
@@ -206,18 +203,13 @@ public class DistributedRowMatrix implem
    *          RandomAccessSparseVector, DenseVector
    * @return Vector containing the column-wise mean of this
    */
-  public Vector columnMeans(String vectorClass) throws IOException,
-      InterruptedException, IllegalArgumentException, SecurityException,
-      ClassNotFoundException, InstantiationException, IllegalAccessException,
-      InvocationTargetException, NoSuchMethodException {
+  public Vector columnMeans(String vectorClass) throws IOException {
     Path outputVectorTmpPath =
         new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
     Configuration initialConf =
         getConf() == null ? new Configuration() : getConf();
     String vectorClassFull = "org.apache.mahout.math." + vectorClass;
-    Vector mean =
-        MatrixColumnMeansJob.run(initialConf, rowPath, outputVectorTmpPath,
-            vectorClassFull);
+    Vector mean = MatrixColumnMeansJob.run(initialConf, rowPath, outputVectorTmpPath, vectorClassFull);
     if (!keepTempFiles) {
       FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
       fs.delete(outputVectorTmpPath, true);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.lib.i
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
@@ -44,15 +45,18 @@ import com.google.common.io.Closeables;
  * DistributedRowMatrix. This job can be accessed using
  * DistributedRowMatrix.columnMeans()
  */
-public class MatrixColumnMeansJob {
+public final class MatrixColumnMeansJob {
 
   public static final String VECTOR_CLASS =
     "DistributedRowMatrix.columnMeans.vector.class";
 
+  private MatrixColumnMeansJob() {
+  }
+
   public static Vector run(Configuration conf,
                            Path inputPath,
                            Path outputVectorTmpPath) throws IOException {
-    return run(conf, inputPath, outputVectorTmpPath);
+    return run(conf, inputPath, outputVectorTmpPath, VECTOR_CLASS);
   }
 
   /**
@@ -61,7 +65,7 @@ public class MatrixColumnMeansJob {
    * @param initialConf
    * @param inputPath
    *          path to DistributedRowMatrix input
-   * @param tmpPath
+   * @param outputVectorTmpPath
    *          path for temporary files created during job
    * @param vectorClass
    *          String of desired class for returned vector e.g. DenseVector,
@@ -116,10 +120,11 @@ public class MatrixColumnMeansJob {
         Closeables.closeQuietly(iterator);
       }
     } catch (Throwable thr) {
-      if (thr instanceof IOException)
+      if (thr instanceof IOException) {
         throw (IOException) thr;
-      else
+      } else {
         throw new IOException(thr);
+      }
     }
   }
 
@@ -129,8 +134,8 @@ public class MatrixColumnMeansJob {
   public static class MatrixColumnMeansMapper extends
       Mapper<IntWritable, VectorWritable, NullWritable, VectorWritable> {
 
-    private Vector runningSum = null;
-    private String vectorClass = null;
+    private Vector runningSum;
+    private String vectorClass;
 
     @Override
     public void setup(Context context) {
@@ -147,17 +152,14 @@ public class MatrixColumnMeansJob {
     public void map(IntWritable r, VectorWritable v, Context context)
       throws IOException {
       if (runningSum == null) {
-        try {
           /*
            * If this is the first vector the mapper has seen, instantiate a new
            * vector using the parameter VECTOR_CLASS
            */
-          runningSum =
-            (Vector) Class.forName(vectorClass).getConstructor(int.class)
-                          .newInstance(v.get().size() + 1);
-        } catch (Exception e) {
-          e.printStackTrace();
-        }
+        runningSum = ClassUtils.instantiateAs(vectorClass,
+                                              Vector.class,
+                                              new Class<?>[] { int.class },
+                                              new Object[] { v.get().size() + 1 });
         runningSum.set(0, 1);
         runningSum.viewPart(1, v.get().size()).assign(v.get());
       } else {
@@ -188,9 +190,10 @@ public class MatrixColumnMeansJob {
   public static class MatrixColumnMeansReducer extends
       Reducer<NullWritable, VectorWritable, IntWritable, VectorWritable> {
 
-    private static final IntWritable one = new IntWritable(1);
-    private String vectorClass = null;
-    Vector outputVector = null;
+    private static final IntWritable ONE = new IntWritable(1);
+
+    private String vectorClass;
+    Vector outputVector;
     VectorWritable outputVectorWritable = new VectorWritable();
 
     @Override
@@ -201,8 +204,7 @@ public class MatrixColumnMeansJob {
     @Override
     public void reduce(NullWritable n,
                        Iterable<VectorWritable> vectors,
-                       Context context) throws IOException,
-      InterruptedException {
+                       Context context) throws IOException, InterruptedException {
 
       /**
        * Add together partial column-wise sums from mappers
@@ -223,16 +225,13 @@ public class MatrixColumnMeansJob {
         outputVectorWritable.set(outputVector.viewPart(1,
                                                        outputVector.size() - 1)
                                              .divide(outputVector.get(0)));
-        context.write(one, outputVectorWritable);
+        context.write(ONE, outputVectorWritable);
       } else {
-        try {
-          Vector emptyVector =
-            (Vector) Class.forName(vectorClass).getConstructor(int.class)
-                          .newInstance(0);
-          context.write(one, new VectorWritable(emptyVector));
-        } catch (Exception e) {
-          e.printStackTrace();
-        }
+        Vector emptyVector = ClassUtils.instantiateAs(vectorClass,
+                                                      Vector.class,
+                                                      new Class<?>[] { int.class },
+                                                      new Object[] { 0 });
+        context.write(ONE, new VectorWritable(emptyVector));
       }
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java Wed Jun 20 12:07:50 2012
@@ -95,7 +95,6 @@ public class TransposeJob extends Abstra
     FileInputFormat.addInputPath(conf, matrixInputPath);
     conf.setInputFormat(SequenceFileInputFormat.class);
     FileOutputFormat.setOutputPath(conf, matrixOutputPath);
-    System.out.println("OUTPUT --> " + matrixOutputPath.toString());
     conf.setMapperClass(TransposeMapper.class);
     conf.setMapOutputKeyClass(IntWritable.class);
     conf.setMapOutputValueClass(VectorWritable.class);
@@ -136,8 +135,10 @@ public class TransposeJob extends Abstra
           implements Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
 
     @Override
-    public void reduce(WritableComparable<?> key, Iterator<VectorWritable> vectors,
-                       OutputCollector<WritableComparable<?>, VectorWritable> out, Reporter reporter) throws IOException {
+    public void reduce(WritableComparable<?> key,
+                       Iterator<VectorWritable> vectors,
+                       OutputCollector<WritableComparable<?>,VectorWritable> out,
+                       Reporter reporter) throws IOException {
       out.collect(key, VectorWritable.merge(vectors));
     }
   }
@@ -146,8 +147,10 @@ public class TransposeJob extends Abstra
           implements Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
 
     @Override
-    public void reduce(WritableComparable<?> key, Iterator<VectorWritable> vectors,
-                       OutputCollector<WritableComparable<?>, VectorWritable> out, Reporter reporter) throws IOException {
+    public void reduce(WritableComparable<?> key,
+                       Iterator<VectorWritable> vectors,
+                       OutputCollector<WritableComparable<?>, VectorWritable> out,
+                       Reporter reporter) throws IOException {
       Vector merged = VectorWritable.merge(vectors).get();
       out.collect(key, new VectorWritable(new SequentialAccessSparseVector(merged)));
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Wed Jun 20 12:07:50 2012
@@ -57,7 +57,7 @@ public class DistributedLanczosSolver ex
    * For the distributed case, the best guess at a useful initialization state for Lanczos we'll chose to be
    * uniform over all input dimensions, L_2 normalized.
    */
-  public Vector getInitialVector(VectorIterable corpus) {
+  public static Vector getInitialVector(VectorIterable corpus) {
     Vector initialVector = new DenseVector(corpus.numCols());
     initialVector.assign(1.0 / Math.sqrt(corpus.numCols()));
     return initialVector;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,6 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.MatrixSlice;
-import org.apache.mahout.math.OrthonormalityVerifier;
 import org.apache.mahout.math.SparseRowMatrix;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorIterable;
@@ -182,10 +181,6 @@ public class EigenVerificationJob extend
     return parseArguments(args);
   }
 
-  private VectorIterable computePairwiseInnerProducts() {
-    return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify);
-  }
-
   private void saveCleanEigens(Configuration conf, Collection<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta)
     throws IOException {
     Path path = new Path(outPath, CLEAN_EIGENVECTORS);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java Wed Jun 20 12:07:50 2012
@@ -84,10 +84,8 @@ public class HdfsBackedLanczosState exte
   }
 
   private void createDirIfNotExist(Path path) throws IOException {
-    if (!fs.exists(path)) {
-      if (!fs.mkdirs(path)) {
-        throw new IOException("Unable to create: " + path);
-      }
+    if (!fs.exists(path) && !fs.mkdirs(path)) {
+      throw new IOException("Unable to create: " + path);
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Wed Jun 20 12:07:50 2012
@@ -201,7 +201,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
       Vector rowVector = similarity.normalize(vectorWritable.get());
 
@@ -243,7 +243,7 @@ public class RowSimilarityJob extends Ab
   public static class MergeVectorsCombiner extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
     @Override
     protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       ctx.write(row, new VectorWritable(Vectors.merge(partialVectors)));
     }
   }
@@ -263,7 +263,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Vector partialVector = Vectors.merge(partialVectors);
 
       if (row.get() == NORM_VECTOR_MARKER) {
@@ -316,7 +316,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void map(IntWritable column, VectorWritable occurrenceVector, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Vector.Element[] occurrences = Vectors.toArray(occurrenceVector);
       Arrays.sort(occurrences, BY_INDEX);
 
@@ -342,8 +342,7 @@ public class RowSimilarityJob extends Ab
   }
 
 
-  public static class SimilarityReducer
-      extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
+  public static class SimilarityReducer extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
 
     private VectorSimilarityMeasure similarity;
     private int numberOfColumns;
@@ -364,7 +363,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Iterator<VectorWritable> partialDotsIterator = partialDots.iterator();
       Vector dots = partialDotsIterator.next().get();
       while (partialDotsIterator.hasNext()) {
@@ -405,7 +404,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Vector similarities = similaritiesWritable.get();
       // For performance reasons moved transposedPartial creation out of the while loop and reusing the same vector
       Vector transposedPartial = similarities.like();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java Wed Jun 20 12:07:50 2012
@@ -37,9 +37,10 @@ import java.io.IOException;
 import java.util.Comparator;
 import java.util.Iterator;
 
-public class Vectors {
+public final class Vectors {
 
-  private Vectors() {}
+  private Vectors() {
+  }
 
   public static Vector maybeSample(Vector original, int sampleSize) {
     if (original.getNumNondefaultElements() <= sampleSize) {
@@ -165,7 +166,8 @@ public class Vectors {
   /* ugly optimization for loading sparse vectors containing ints only */
   public static OpenIntIntHashMap readAsIntMap(DataInput in) throws IOException {
     int flags = in.readByte();
-    Preconditions.checkArgument(flags >> VectorWritable.NUM_FLAGS == 0, "Unknown flags set: %d", Integer.toString(flags, 2));
+    Preconditions.checkArgument(flags >> VectorWritable.NUM_FLAGS == 0,
+                                "Unknown flags set: %d", Integer.toString(flags, 2));
     boolean dense = (flags & VectorWritable.FLAG_DENSE) != 0;
     boolean sequential = (flags & VectorWritable.FLAG_SEQUENTIAL) != 0;
     boolean laxPrecision = (flags & VectorWritable.FLAG_LAX_PRECISION) != 0;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java Wed Jun 20 12:07:50 2012
@@ -23,9 +23,11 @@ public class LoglikelihoodSimilarity ext
 
   @Override
   public double similarity(double summedAggregations, double normA, double normB, int numberOfColumns) {
-    double logLikelihood = LogLikelihood.logLikelihoodRatio((long) summedAggregations, (long) (normB - summedAggregations),
-        (long) (normA - summedAggregations), (long) (numberOfColumns - normA - normB + summedAggregations));
-
+    double logLikelihood =
+        LogLikelihood.logLikelihoodRatio((long) summedAggregations,
+                                         (long) (normB - summedAggregations),
+                                         (long) (normA - summedAggregations),
+                                         (long) (numberOfColumns - normA - normB + summedAggregations));
     return 1.0 - 1.0 / (1.0 + logLikelihood);
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java Wed Jun 20 12:07:50 2012
@@ -55,9 +55,8 @@ public class DistributedConjugateGradien
    * @param b              Vector b
    * @param preconditioner Optional preconditioner for the system
    * @param maxIterations  Maximum number of iterations to run, defaults to numCols
-   * @param maxError       Maximum error tolerated in the result. If the norm of the residual falls below this, then the 
-   *                       algorithm stops and returns. 
-
+   * @param maxError       Maximum error tolerated in the result. If the norm of the residual falls below this,
+   *                       then the algorithm stops and returns.
    * @return               The vector that solves the system.
    */
   public Vector runJob(Path inputPath, 
@@ -92,7 +91,9 @@ public class DistributedConjugateGradien
     Path vectorPath = new Path(AbstractJob.getOption(parsedArgs, "--vector"));
     int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows"));
     int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols"));
-    int maxIterations = parsedArgs.containsKey("--maxIter") ? Integer.parseInt(AbstractJob.getOption(parsedArgs, "--maxIter")) : numCols;
+    int maxIterations = parsedArgs.containsKey("--maxIter")
+        ? Integer.parseInt(AbstractJob.getOption(parsedArgs, "--maxIter"))
+        : numCols;
     double maxError = parsedArgs.containsKey("--maxError") 
         ? Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError"))
         : ConjugateGradientSolver.DEFAULT_MAX_ERROR;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java Wed Jun 20 12:07:50 2012
@@ -23,9 +23,9 @@ package org.apache.mahout.math.hadoop.st
  */
 public final class VarianceTotals {
 
-  private double sumOfSquares = 0.0;
-  private double sum = 0.0;
-  private double totalCount = 0.0;
+  private double sumOfSquares;
+  private double sum;
+  private double totalCount;
 
   public double getSumOfSquares() {
     return sumOfSquares;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java Wed Jun 20 12:07:50 2012
@@ -62,7 +62,7 @@ import org.apache.mahout.math.hadoop.sto
  * 
  */
 @SuppressWarnings("deprecation")
-public class ABtJob {
+public final class ABtJob {
 
   public static final String PROP_BT_PATH = "ssvd.Bt.path";
   public static final String PROP_BT_BROADCAST = "ssvd.Bt.broadcast";
@@ -220,16 +220,16 @@ public class ABtJob {
         // DEBUG: stdout
         //System.out.printf("list of files: " + btFiles);
 
-        String btLocalPath = "";
+        StringBuilder btLocalPath = new StringBuilder();
         for (Path btFile : btFiles) {
-          if (!btLocalPath.isEmpty()) {
-            btLocalPath += Path.SEPARATOR_CHAR;
+          if (btLocalPath.length() > 0) {
+            btLocalPath.append(Path.SEPARATOR_CHAR);
           }
-          btLocalPath += btFile;
+          btLocalPath.append(btFile);
         }
 
         btInput =
-          new SequenceFileDirIterator<IntWritable, VectorWritable>(new Path(btLocalPath),
+          new SequenceFileDirIterator<IntWritable, VectorWritable>(new Path(btLocalPath.toString()),
                                                                    PathType.LIST,
                                                                    null,
                                                                    null,
@@ -379,16 +379,12 @@ public class ABtJob {
     /**
      * key doesn't matter here, only value does. key always gets substituted by
      * SPW.
-     * 
-     * @param <K>
-     *          bogus
      */
-    private <K, V> OutputCollector<K, V>
-        createOutputCollector(String name,
-                              final SplitPartitionedWritable spw,
-                              Context ctx,
-                              Class<V> valueClass) throws IOException,
-          InterruptedException {
+    private <K,V> OutputCollector<K,V> createOutputCollector(String name,
+                                                             final SplitPartitionedWritable spw,
+                                                             Context ctx,
+                                                             Class<V> valueClass)
+      throws IOException, InterruptedException {
       Path outputPath = getSplitFilePath(name, spw, ctx);
       final SequenceFile.Writer w =
         SequenceFile.createWriter(FileSystem.get(outputPath.toUri(), ctx.getConfiguration()),
@@ -406,9 +402,7 @@ public class ABtJob {
     }
 
     @Override
-    protected void cleanup(Context context) throws IOException,
-      InterruptedException {
-
+    protected void cleanup(Context context) throws IOException, InterruptedException {
       IOUtils.close(closeables);
     }