You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/06/12 22:44:21 UTC

svn commit: r1492416 [1/3] - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/clas...

Author: ssc
Date: Wed Jun 12 20:44:19 2013
New Revision: 1492416

URL: http://svn.apache.org/r1492416
Log:
MAHOUT-1258 Another shot at findbugs and checkstyle

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/CardinalityException.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Centroid.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Vector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/PlusMult.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/AbstractVectorTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorBinaryAggregateTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorBinaryAssignTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/randomized/RandomBlasting.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Wed Jun 12 20:44:19 2013
@@ -2,9 +2,11 @@ Mahout Change Log
 
 Release 0.8 - unreleased
 
-__MAHOUT-1253: Add experiment tools for StreamingKMeans, part 1 (dfilimon)
+  MAHOUT-1258: Another shot at findbugs and checkstyle (ssc)
 
-  MAHOUT-884: Matrix Concatenate Utility (Lance Norskog, smarthi)
+  MAHOUT-1253: Add experiment tools for StreamingKMeans, part 1 (dfilimon)
+
+  MAHOUT-884:  Matrix Concatenate Utility (Lance Norskog via smarthi)
 
   MAHOUT-1250: Deprecate unused algorithms (ssc)
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Wed Jun 12 20:44:19 2013
@@ -58,8 +58,8 @@ public final class TasteHadoopUtils {
   }
 
   public static int readID(String token, boolean usesLongIDs) {
-    return usesLongIDs ?
-        TasteHadoopUtils.idToIndex(Long.parseLong(token))
+    return usesLongIDs
+        ? TasteHadoopUtils.idToIndex(Long.parseLong(token))
         : Integer.parseInt(token);
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Wed Jun 12 20:44:19 2013
@@ -250,7 +250,7 @@ public class ParallelALSFactorizationJob
 
     @Override
     protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       result.set(ALS.sum(values.iterator()));
       ctx.write(key, result);
     }
@@ -263,7 +263,7 @@ public class ParallelALSFactorizationJob
 
     @Override
     protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Vector sum = ALS.sum(values.iterator());
       result.set(new SequentialAccessSparseVector(sum));
       ctx.write(key, result);
@@ -277,7 +277,7 @@ public class ParallelALSFactorizationJob
 
     @Override
     public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       Vector merged = VectorWritable.merge(vectors.iterator()).get();
       result.set(new SequentialAccessSparseVector(merged));
       ctx.write(key, result);
@@ -330,7 +330,7 @@ public class ParallelALSFactorizationJob
     if (implicitFeedback) {
       solverMapperClassInternal = SolveImplicitFeedbackMapper.class;
       name = "Recompute " + matrixName + ", iteration (" + (iterationNumber + 1) + '/' + numIterations + "), "
-          + '(' + numThreadsPerSolver + " threads, " + numFeatures +" features, implicit feedback)";
+          + '(' + numThreadsPerSolver + " threads, " + numFeatures + " features, implicit feedback)";
     } else {
       solverMapperClassInternal = SolveExplicitFeedbackMapper.class;
       name = "Recompute " + matrixName + ", iteration (" + (iterationNumber + 1) + '/' + numIterations + "), "
@@ -412,7 +412,7 @@ public class ParallelALSFactorizationJob
   static class IDMapReducer extends Reducer<VarIntWritable,VarLongWritable,VarIntWritable,VarLongWritable> {
     @Override
     protected void reduce(VarIntWritable index, Iterable<VarLongWritable> ids, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       ctx.write(index, ids.iterator().next());
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Wed Jun 12 20:44:19 2013
@@ -99,7 +99,7 @@ public class ConfusionMatrix {
     int count = 0;
     double accuracy = 0;
     for (String label: labelMap.keySet()) {
-      if (! label.equals(defaultLabel)) {
+      if (!label.equals(defaultLabel)) {
         accuracy += getAccuracy(label);
       }
       count++;
@@ -121,14 +121,14 @@ public class ConfusionMatrix {
   public double getKappa() {
     double a = 0.0;
     double b = 0.0;
-    for(int i = 0; i < confusionMatrix.length; i++) {
+    for (int i = 0; i < confusionMatrix.length; i++) {
       a += confusionMatrix[i][i];
       double br = 0;
-      for(int j = 0; j < confusionMatrix.length; j++) {
+      for (int j = 0; j < confusionMatrix.length; j++) {
         br += confusionMatrix[i][j];
       }
       double bc = 0;
-      for(int j = 0; j < confusionMatrix.length; j++) {
+      for (int j = 0; j < confusionMatrix.length; j++) {
         bc += confusionMatrix[j][i];
       }
       b += br * bc;
@@ -143,9 +143,9 @@ public class ConfusionMatrix {
    */
   public RunningAverageAndStdDev getNormalizedStats() {
     RunningAverageAndStdDev summer = new FullRunningAverageAndStdDev();
-    for(int d = 0; d < confusionMatrix.length; d++) {
+    for (int d = 0; d < confusionMatrix.length; d++) {
       double total = 0;
-      for(int j = 0; j < confusionMatrix.length; j++) {
+      for (int j = 0; j < confusionMatrix.length; j++) {
         total += confusionMatrix[d][j];
       }
       summer.addDatum(confusionMatrix[d][d] / (total + 0.000001));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java Wed Jun 12 20:44:19 2013
@@ -26,7 +26,7 @@ import java.util.List;
 import java.util.Random;
 
 /**
- * Holds a list of vectors and their corresponding Dataset. contains various operations that deals with the
+ * Holds a list of vectors and their corresponding Dataset. contains various OPERATIONS that deals with the
  * vectors (subset, count,...)
  * 
  */

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java Wed Jun 12 20:44:19 2013
@@ -69,7 +69,7 @@ public class Dataset {
     private static Attribute fromString(String from) {
       
       Attribute toReturn = LABEL;
-      if(NUMERICAL.toString().equalsIgnoreCase(from)) {
+      if (NUMERICAL.toString().equalsIgnoreCase(from)) {
         toReturn = NUMERICAL;
       } else if (CATEGORICAL.toString().equalsIgnoreCase(from)) {
         toReturn = CATEGORICAL;
@@ -103,15 +103,14 @@ public class Dataset {
   private int nbInstances;
   
   /** JSON serial/de-serial-izer */
-  private static final ObjectMapper objectMapper = new ObjectMapper();
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 
   // Some literals for JSON representation
-  final static String TYPE = "type";
-  final static String VALUES = "values";
-  final static String LABEL = "label";
+  static final String TYPE = "type";
+  static final String VALUES = "values";
+  static final String LABEL = "label";
 
-  protected Dataset() {
-  }
+  protected Dataset() {}
 
   /**
    * Should only be called by a DataLoader
@@ -186,7 +185,7 @@ public class Dataset {
   }
   
   public Attribute getAttribute(int attr) {
-	  return attributes[attr];
+    return attributes[attr];
   }
 
   /**
@@ -215,7 +214,7 @@ public class Dataset {
   }
   
   public String toString() {
-	  return "attributes="+Arrays.toString(attributes);
+    return "attributes=" + Arrays.toString(attributes);
   }
 
   /**
@@ -323,7 +322,7 @@ public class Dataset {
 
     FileSystem fs = path.getFileSystem(conf);
     long bytesToRead = fs.getFileStatus(path).getLen();
-    byte[] buff = new byte[new Long(bytesToRead).intValue()];
+    byte[] buff = new byte[Long.valueOf(bytesToRead).intValue()];
     FSDataInputStream input = fs.open(path);
     try {
       input.readFully(buff);
@@ -361,7 +360,7 @@ public class Dataset {
       toWrite.add(attribute);
     }
     try {
-      return objectMapper.writeValueAsString(toWrite);
+      return OBJECT_MAPPER.writeValueAsString(toWrite);
     } catch (Exception ex) {
       throw new RuntimeException(ex);
     }
@@ -377,7 +376,7 @@ public class Dataset {
     Dataset dataset = new Dataset();
     List<Map<String, Object>> fromJSON;
     try {
-       fromJSON = objectMapper.readValue(json, new TypeReference<List<Map<String, Object>>>() {});
+      fromJSON = OBJECT_MAPPER.readValue(json, new TypeReference<List<Map<String, Object>>>() {});
     } catch (Exception ex) {
       throw new RuntimeException(ex);
     }
@@ -386,15 +385,15 @@ public class Dataset {
     String[][] nominalValues = new String[fromJSON.size()][];
     for (int i = 0; i < fromJSON.size(); i++) {
       Map<String, Object> attribute = fromJSON.get(i);
-      if(Attribute.fromString((String) attribute.get(TYPE)) == Attribute.IGNORED) {
+      if (Attribute.fromString((String) attribute.get(TYPE)) == Attribute.IGNORED) {
         ignored.add(i);
       } else {
         Attribute asAttribute = Attribute.fromString((String) attribute.get(TYPE));
         attributes.add(asAttribute);
-        if((Boolean) attribute.get(LABEL)) {
+        if ((Boolean) attribute.get(LABEL)) {
           dataset.labelId = i - ignored.size();
         }
-        if(attribute.get(VALUES) != null) {
+        if (attribute.get(VALUES) != null) {
           List get = (List) attribute.get(VALUES);
           String[] array = (String[]) get.toArray(new String[]{});
           nominalValues[i] = array;
@@ -404,7 +403,7 @@ public class Dataset {
     dataset.attributes = attributes.toArray(new Attribute[]{});
     dataset.ignored = new int[ignored.size()];
     dataset.values = nominalValues;
-    for(int i = 0; i < dataset.ignored.length; i++) {
+    for (int i = 0; i < dataset.ignored.length; i++) {
       dataset.ignored[i] = ignored.get(i);
     }
     return dataset;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java Wed Jun 12 20:44:19 2013
@@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -204,17 +203,9 @@ public class Classifier {
       if (files.length < 2) {
         throw new IOException("not enough paths in the DistributedCache");
       }
-      LocalFileSystem localFs = FileSystem.getLocal(conf);
-      if (!localFs.exists(files[0])) {//MAHOUT-992: this seems safe
-        files[0] = localFs.makeQualified(new Path(DistributedCache.getCacheFiles(conf)[0].getPath()));
-      }
-
       dataset = Dataset.load(conf, files[0]);
-
       converter = new DataConverter(dataset);
-      if (!localFs.exists(files[1])) {//MAHOUT-992: this seems safe
-        files[1] = localFs.makeQualified(new Path(DistributedCache.getCacheFiles(conf)[1].getPath()));
-      }
+
       forest = DecisionForest.load(conf, files[1]);
       if (forest == null) {
         throw new InterruptedException("DecisionForest not found!");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java Wed Jun 12 20:44:19 2013
@@ -33,13 +33,13 @@ public class L2 implements PriorFunction
   private double s;
 
   public L2(double scale) {
-    this.s = scale;
-    this.s2 = scale * scale;
+    s = scale;
+    s2 = scale * scale;
   }
 
   public L2() {
-    this.s = 1.0;
-    this.s2 = 1.0;	  
+    s = 1.0;
+    s2 = 1.0;
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java Wed Jun 12 20:44:19 2013
@@ -133,8 +133,8 @@ public final class ClusteringUtils {
       double maxDBIndex = 0;
       for (int j = 0; j < n; ++j) {
         if (i != j) {
-          double dbIndex = (averageDistanceI + clusterDistanceSummaries.get(j).getMean()) /
-              distanceMeasure.distance(centroids.get(i), centroids.get(j));
+          double dbIndex = (averageDistanceI + clusterDistanceSummaries.get(j).getMean())
+              / distanceMeasure.distance(centroids.get(i), centroids.get(j));
           if (dbIndex > maxDBIndex) {
             maxDBIndex = dbIndex;
           }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed Jun 12 20:44:19 2013
@@ -371,10 +371,7 @@ public class CanopyDriver extends Abstra
                                   boolean runSequential)
     throws IOException, InterruptedException, ClassNotFoundException {
     ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
-    ClusterClassificationDriver.run(conf,
-    		                        points,
-                                    output,
-                                    new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+    ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
                                     clusterClassificationThreshold, true, runSequential);
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java Wed Jun 12 20:44:19 2013
@@ -124,12 +124,13 @@ public final class ClusterClassification
    */
   public static void run(Path input, Path clusteringOutputPath, Path output, Double clusterClassificationThreshold,
       boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
-	  Configuration conf = new Configuration();
-	  run(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely, runSequential);
+    Configuration conf = new Configuration();
+    run(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely, runSequential);
   }
 
-  public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, Double clusterClassificationThreshold,
-      boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
+  public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output,
+                         Double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException {
     if (runSequential) {
       classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
     } else {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -157,7 +157,7 @@ public class FuzzyKMeansDriver extends A
                          boolean emitMostLikely,
                          double threshold,
                          boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
-	Configuration conf = new Configuration();
+    Configuration conf = new Configuration();
     Path clustersOut = buildClusters(conf,
                                      input,
                                      clustersIn,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -96,9 +96,6 @@ public class KMeansDriver extends Abstra
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
     boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
         DefaultOptionCreator.SEQUENTIAL_METHOD);
-    if (getConf() == null) {
-      setConf(new Configuration());
-    }
     double clusterClassificationThreshold = 0.0;
     if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
       clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Wed Jun 12 20:44:19 2013
@@ -91,14 +91,6 @@ public class InMemoryCollapsedVariationa
   public void setVerbose(boolean verbose) {
     this.verbose = verbose;
   }
-
-  public InMemoryCollapsedVariationalBayes0(Matrix corpus,
-                                            String[] terms,
-                                            int numTopics,
-                                            double alpha,
-                                            double eta) {
-    this(corpus, terms, numTopics, alpha, eta, 1, 1, 0);
-  }
     
   public InMemoryCollapsedVariationalBayes0(Matrix corpus,
                                             String[] terms,
@@ -468,9 +460,6 @@ public class InMemoryCollapsedVariationa
 
   @Override
   public Configuration getConf() {
-    if (super.getConf() == null) {
-      setConf(new Configuration());
-    }
     return super.getConf();
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Wed Jun 12 20:44:19 2013
@@ -247,7 +247,7 @@ public class TopicModel implements Confi
       topicTermCounts.assignRow(x, new SequentialAccessSparseVector(numTerms));
     }
     topicSums.assign(1.0);
-    if(threadPool.isTerminated()) {
+    if (threadPool.isTerminated()) {
       initializeThreadPool();
     }
   }
@@ -262,7 +262,7 @@ public class TopicModel implements Confi
         log.warn("Threadpool timed out on await termination - jobs still running!");
       }
     } catch (InterruptedException e) {
-        log.error("Interrupted shutting down!", e);
+      log.error("Interrupted shutting down!", e);
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java Wed Jun 12 20:44:19 2013
@@ -57,8 +57,8 @@ public final class MinHashDriver extends
 
     addOption(MIN_CLUSTER_SIZE, "mcs", "Minimum points inside a cluster", String.valueOf(10));
     addOption(MIN_VECTOR_SIZE, "mvs", "Minimum size of vector to be hashed", String.valueOf(5));
-    addOption(VECTOR_DIMENSION_TO_HASH, "vdh", "Dimension of vector to hash. Available types: (value, index). " +
-        "Defaults to 'value'", HASH_DIMENSION_VALUE);
+    addOption(VECTOR_DIMENSION_TO_HASH, "vdh", "Dimension of vector to hash. Available types: (value, index). "
+        + "Defaults to 'value'", HASH_DIMENSION_VALUE);
     addOption(HASH_TYPE, "ht", "Type of hash function to use. Available types: (linear, polynomial, murmur) ",
         HashFactory.HashType.MURMUR.toString());
     addOption(NUM_HASH_FUNCTIONS, "nh", "Number of hash functions to be used", String.valueOf(10));
@@ -110,7 +110,7 @@ public final class MinHashDriver extends
 
     boolean succeeded = minHash.waitForCompletion(true);
     if (!succeeded) {
-     return -1;
+      return -1;
     }
 
     return 0;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Wed Jun 12 20:44:19 2013
@@ -212,7 +212,7 @@ public class EigencutsDriver extends Abs
 
   /**
    * Iteratively loops through the list, converting it to a Vector of double
-   * primitives worthy of other Mahout operations
+   * primitives worthy of other Mahout OPERATIONS
    */
   private static Vector listToVector(Collection<Double> list) {
     Vector retval = new DenseVector(list.size());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java Wed Jun 12 20:44:19 2013
@@ -32,7 +32,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * <p>There are a quite a few operations bundled within this mapper. Gather 'round
+ * <p>There are a quite a few OPERATIONS bundled within this mapper. Gather 'round
  * and listen, all of ye.</p>
  * 
  * <p>The input to this job is eight items:</p>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java Wed Jun 12 20:44:19 2013
@@ -281,11 +281,11 @@ public class BallKMeans implements Itera
    * @param datapoints The datapoints to select from.  These datapoints should be WeightedVectors of some kind.
    */
   private void initializeSeedsKMeansPlusPlus(List<? extends WeightedVector> datapoints) {
-    Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster " +
-        "sensibly");
+    Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster "
+        + "sensibly");
     Preconditions.checkArgument(datapoints.size() >= numClusters,
         String.format("Must have more datapoints [%d] than clusters [%d]", datapoints.size(), numClusters));
-    // Compute the centroid of all of the datapoints.  This is then used to compute the squared radius of the datapoints.
+    // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints.
     Centroid center = new Centroid(datapoints.iterator().next());
     for (WeightedVector row : Iterables.skip(datapoints, 1)) {
       center.update(row);
@@ -446,8 +446,8 @@ public class BallKMeans implements Itera
     return Iterators.transform(centroids.iterator(), new Function<Vector, Centroid>() {
       @Override
       public Centroid apply(Vector input) {
-        Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids " +
-            "searcher");
+        Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids "
+            + "searcher");
         //noinspection ConstantConditions
         return (Centroid)input;
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -164,40 +164,40 @@ public final class StreamingKMeansDriver
     // There will be k final clusters, but in the Map phase to get a good approximation of the data, O(k log n)
     // clusters are needed. Since n is the number of data points and not knowable until reading all the vectors,
     // provide a decent estimate.
-    addOption(ESTIMATED_NUM_MAP_CLUSTERS, "km", "The estimated number of clusters to use for the " +
-        "Map phase of the job when running StreamingKMeans. This should be around k * log(n), " +
-        "where k is the final number of clusters and n is the total number of data points to " +
-        "cluster.");
+    addOption(ESTIMATED_NUM_MAP_CLUSTERS, "km", "The estimated number of clusters to use for the "
+        + "Map phase of the job when running StreamingKMeans. This should be around k * log(n), "
+        + "where k is the final number of clusters and n is the total number of data points to "
+        + "cluster.");
 
-    addOption(ESTIMATED_DISTANCE_CUTOFF, "e", "The initial estimated distance cutoff between two " +
-        "points for forming new clusters. If no value is given, it's estimated from the data set",
+    addOption(ESTIMATED_DISTANCE_CUTOFF, "e", "The initial estimated distance cutoff between two "
+        + "points for forming new clusters. If no value is given, it's estimated from the data set",
         String.valueOf(INVALID_DISTANCE_CUTOFF));
 
     // BallKMeans (reducer) options
-    addOption(MAX_NUM_ITERATIONS, "mi", "The maximum number of iterations to run for the " +
-        "BallKMeans algorithm used by the reducer. If no value is given, defaults to 10.", String.valueOf(10));
+    addOption(MAX_NUM_ITERATIONS, "mi", "The maximum number of iterations to run for the "
+        + "BallKMeans algorithm used by the reducer. If no value is given, defaults to 10.", String.valueOf(10));
 
-    addOption(TRIM_FRACTION, "tf", "The 'ball' aspect of ball k-means means that only the closest points " +
-        "to the centroid will actually be used for updating. The fraction of the points to be used is those " +
-        "points whose distance to the center is within trimFraction * distance to the closest other center. " +
-        "If no value is given, defaults to 0.9.", String.valueOf(0.9));
-
-    addFlag(RANDOM_INIT, "ri", "Whether to use k-means++ initialization or random initialization " +
-        "of the seed centroids. Essentially, k-means++ provides better clusters, but takes longer, whereas random " +
-        "initialization takes less time, but produces worse clusters, and tends to fail more often and needs " +
-        "multiple runs to compare to k-means++. If set, uses the random initialization.");
-
-    addFlag(IGNORE_WEIGHTS, "iw", "Whether to correct the weights of the centroids after the clustering is done. " +
-        "The weights end up being wrong because of the trimFraction and possible train/test splits. In some cases, " +
-        "especially in a pipeline, having an accurate count of the weights is useful. If set, ignores the final " +
-        "weights");
-
-    addOption(TEST_PROBABILITY, "testp", "A double value between 0 and 1 that represents the percentage of " +
-        "points to be used for 'testing' different clustering runs in the final BallKMeans " +
-        "step. If no value is given, defaults to 0.1", String.valueOf(0.1));
+    addOption(TRIM_FRACTION, "tf", "The 'ball' aspect of ball k-means means that only the closest points "
+        + "to the centroid will actually be used for updating. The fraction of the points to be used is those "
+        + "points whose distance to the center is within trimFraction * distance to the closest other center. "
+        + "If no value is given, defaults to 0.9.", String.valueOf(0.9));
+
+    addFlag(RANDOM_INIT, "ri", "Whether to use k-means++ initialization or random initialization "
+        + "of the seed centroids. Essentially, k-means++ provides better clusters, but takes longer, whereas random "
+        + "initialization takes less time, but produces worse clusters, and tends to fail more often and needs "
+        + "multiple runs to compare to k-means++. If set, uses the random initialization.");
+
+    addFlag(IGNORE_WEIGHTS, "iw", "Whether to correct the weights of the centroids after the clustering is done. "
+        + "The weights end up being wrong because of the trimFraction and possible train/test splits. In some cases, "
+        + "especially in a pipeline, having an accurate count of the weights is useful. If set, ignores the final "
+        + "weights");
+
+    addOption(TEST_PROBABILITY, "testp", "A double value between 0 and 1 that represents the percentage of "
+        + "points to be used for 'testing' different clustering runs in the final BallKMeans "
+        + "step. If no value is given, defaults to 0.1", String.valueOf(0.1));
 
-    addOption(NUM_BALLKMEANS_RUNS, "nbkm", "Number of BallKMeans runs to use at the end to try to cluster the " +
-        "points. If no value is given, defaults to 4", String.valueOf(4));
+    addOption(NUM_BALLKMEANS_RUNS, "nbkm", "Number of BallKMeans runs to use at the end to try to cluster the "
+        + "points. If no value is given, defaults to 4", String.valueOf(4));
 
     // Nearest neighbor search options
     // The distance measure used for computing the distance between two points. Generally, the
@@ -208,22 +208,22 @@ public final class StreamingKMeansDriver
 
     // The default searcher should be something more efficient that BruteSearch (ProjectionSearch, ...). See
     // o.a.m.math.neighborhood.*
-    addOption(SEARCHER_CLASS_OPTION, "sc", "The type of searcher to be used when performing nearest " +
-        "neighbor searches. Defaults to ProjectionSearch.", ProjectionSearch.class.getCanonicalName());
+    addOption(SEARCHER_CLASS_OPTION, "sc", "The type of searcher to be used when performing nearest "
+        + "neighbor searches. Defaults to ProjectionSearch.", ProjectionSearch.class.getCanonicalName());
 
     // In the original paper, the authors used 1 projection vector.
-    addOption(NUM_PROJECTIONS_OPTION, "np", "The number of projections considered in estimating the " +
-        "distances between vectors. Only used when the distance measure requested is either " +
-        "ProjectionSearch or FastProjectionSearch. If no value is given, defaults to 3.", String.valueOf(3));
-
-    addOption(SEARCH_SIZE_OPTION, "s", "In more efficient searches (non BruteSearch), " +
-        "not all distances are calculated for determining the nearest neighbors. The number of " +
-        "elements whose distances from the query vector is actually computer is proportional to " +
-        "searchSize. If no value is given, defaults to 1.", String.valueOf(2));
-
-    addFlag(REDUCE_STREAMING_KMEANS, "rskm", "There might be too many intermediate clusters from the mapper " +
-        "to fit into memory, so the reducer can run another pass of StreamingKMeans to collapse them down to a " +
-        "fewer clusters");
+    addOption(NUM_PROJECTIONS_OPTION, "np", "The number of projections considered in estimating the "
+        + "distances between vectors. Only used when the distance measure requested is either "
+        + "ProjectionSearch or FastProjectionSearch. If no value is given, defaults to 3.", String.valueOf(3));
+
+    addOption(SEARCH_SIZE_OPTION, "s", "In more efficient searches (non BruteSearch), "
+        + "not all distances are calculated for determining the nearest neighbors. The number of "
+        + "elements whose distances from the query vector is actually computer is proportional to "
+        + "searchSize. If no value is given, defaults to 1.", String.valueOf(2));
+
+    addFlag(REDUCE_STREAMING_KMEANS, "rskm", "There might be too many intermediate clusters from the mapper "
+        + "to fit into memory, so the reducer can run another pass of StreamingKMeans to collapse them down to a "
+        + "fewer clusters");
 
     addOption(DefaultOptionCreator.methodOption().create());
 
@@ -338,16 +338,16 @@ public final class StreamingKMeansDriver
     Preconditions.checkArgument(numClusters > 0, "Invalid number of clusters requested");
 
     // StreamingKMeans
-    Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map " +
-        "clusters; There must be more than the final number of clusters (k log n vs k)");
+    Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map "
+        + "clusters; There must be more than the final number of clusters (k log n vs k)");
     Preconditions.checkArgument(estimatedDistanceCutoff == INVALID_DISTANCE_CUTOFF || estimatedDistanceCutoff > 0,
         "estimatedDistanceCutoff cannot be negative");
 
     // BallKMeans
     Preconditions.checkArgument(maxNumIterations > 0, "Must have at least one BallKMeans iteration");
     Preconditions.checkArgument(trimFraction > 0, "trimFraction must be positive");
-    Preconditions.checkArgument(testProbability >= 0 && testProbability < 1, "test probability is not in the " +
-        "interval [0, 1)");
+    Preconditions.checkArgument(testProbability >= 0 && testProbability < 1, "test probability is not in the "
+        + "interval [0, 1)");
     Preconditions.checkArgument(numBallKMeansRuns > 0, "numBallKMeans cannot be negative");
 
     // Searcher
@@ -386,12 +386,12 @@ public final class StreamingKMeansDriver
 
     conf.setBoolean(REDUCE_STREAMING_KMEANS, reduceStreamingKMeans);
 
-    log.info("Parameters are: [k] numClusters {}; " +
-        "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} " +
-        "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; " +
-        "testProbability {}; numBallKMeansRuns {}; " +
-        "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; " +
-        "method {}; reduceStreamingKMeans {}", numClusters, estimatedNumMapClusters, estimatedDistanceCutoff,
+    log.info("Parameters are: [k] numClusters {}; "
+        + "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} "
+        + "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; "
+        + "testProbability {}; numBallKMeansRuns {}; "
+        + "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
+        + "method {}; reduceStreamingKMeans {}", numClusters, estimatedNumMapClusters, estimatedDistanceCutoff,
         maxNumIterations, trimFraction, randomInit, ignoreWeights, testProbability, numBallKMeansRuns,
         measureClass, searcherClass, searchSize, numProjections, method, reduceStreamingKMeans);
   }
@@ -418,7 +418,7 @@ public final class StreamingKMeansDriver
   }
 
   private static int runSequentially(Configuration conf, Path input, Path output)
-      throws IOException, ExecutionException, InterruptedException {
+    throws IOException, ExecutionException, InterruptedException {
     long start = System.currentTimeMillis();
     // Run StreamingKMeans step in parallel by spawning 1 thread per input path to process.
     ExecutorService pool = Executors.newCachedThreadPool();
@@ -452,7 +452,8 @@ public final class StreamingKMeansDriver
   }
 
   @SuppressWarnings("unchecked")
-  public static int runMapReduce(Configuration conf, Path input, Path output) throws IOException, ClassNotFoundException, InterruptedException {
+  public static int runMapReduce(Configuration conf, Path input, Path output)
+    throws IOException, ClassNotFoundException, InterruptedException {
     // Prepare Job for submission.
     Job job = HadoopUtil.prepareJob(input, output, SequenceFileInputFormat.class,
         StreamingKMeansMapper.class, IntWritable.class, CentroidWritable.class,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java Wed Jun 12 20:44:19 2013
@@ -16,12 +16,8 @@ public class StreamingKMeansThread imple
   private Iterable<Centroid> datapoints;
 
   public StreamingKMeansThread(Path input, Configuration conf) {
-    this.datapoints = StreamingKMeansUtilsMR.getCentroidsFromVectorWritable(new SequenceFileValueIterable<VectorWritable>(input, false, conf));
-    this.conf = conf;
-  }
-
-  public StreamingKMeansThread(Iterable<Centroid> datapoints, Configuration conf) {
-    this.datapoints = datapoints;
+    this.datapoints = StreamingKMeansUtilsMR.getCentroidsFromVectorWritable(
+        new SequenceFileValueIterable<VectorWritable>(input, false, conf));
     this.conf = conf;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java Wed Jun 12 20:44:19 2013
@@ -53,8 +53,8 @@ public final class StreamingKMeansUtilsM
     if (searcherClass.equals(BruteSearch.class.getName())) {
       return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
           new Class[]{DistanceMeasure.class}, new Object[]{distanceMeasure});
-    } else if (searcherClass.equals(FastProjectionSearch.class.getName()) ||
-        searcherClass.equals(ProjectionSearch.class.getName())) {
+    } else if (searcherClass.equals(FastProjectionSearch.class.getName())
+        || searcherClass.equals(ProjectionSearch.class.getName())) {
       return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
           new Class[]{DistanceMeasure.class, int.class, int.class},
           new Object[]{distanceMeasure, numProjections, searchSize});
@@ -116,7 +116,7 @@ public final class StreamingKMeansUtilsM
    * @throws java.io.IOException
    */
   public static void writeCentroidsToSequenceFile(Iterable<Centroid> centroids, Path path, Configuration conf)
-      throws IOException {
+    throws IOException {
     SequenceFile.Writer writer = null;
     try {
       writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
@@ -131,7 +131,7 @@ public final class StreamingKMeansUtilsM
   }
 
   public static void writeVectorsToSequenceFile(Iterable<? extends Vector> datapoints, Path path, Configuration conf)
-      throws IOException {
+    throws IOException {
     SequenceFile.Writer writer = null;
     try {
       writer = SequenceFile.createWriter(FileSystem.get(conf), conf,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java Wed Jun 12 20:44:19 2013
@@ -71,7 +71,8 @@ public final class ClusterCountReader {
    * @param conf              The hadoop configuration.
    * @return An ArrayList containing the final cluster ids.
    */
-  public static Map<Integer, Integer> getClusterIDs(Path clusterOutputPath, Configuration conf, boolean keyIsClusterId) throws IOException {
+  public static Map<Integer, Integer> getClusterIDs(Path clusterOutputPath, Configuration conf, boolean keyIsClusterId)
+    throws IOException {
     Map<Integer, Integer> clusterIds = new HashMap<Integer, Integer>();
     FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
     FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
@@ -85,7 +86,7 @@ public final class ClusterCountReader {
     int i = 0;
     while (it.hasNext()) {
       Integer key, value;
-      if (keyIsClusterId == true) { // key is the cluster id, value is i, the index we will use
+      if (keyIsClusterId) { // key is the cluster id, value is i, the index we will use
         key = it.next().getValue().getId();
         value = i;
       } else {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Wed Jun 12 20:44:19 2013
@@ -74,12 +74,8 @@ public final class ClusterOutputPostProc
   public void process() throws IOException {
     createPostProcessDirectory();
     for (Pair<?, WeightedVectorWritable> record
-            : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints,
-            PathType.GLOB,
-            PathFilters.partFilter(),
-            null,
-            false,
-            conf)) {
+        : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints, PathType.GLOB, PathFilters.partFilter(),
+                                                                        null, false, conf)) {
       String clusterId = record.getFirst().toString().trim();
       putVectorInRespectiveCluster(clusterId, record.getSecond());
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java Wed Jun 12 20:44:19 2013
@@ -62,9 +62,6 @@ public final class ClusterOutputPostProc
     Path input = getInputPath();
     Path output = getOutputPath();
 
-    if (getConf() == null) {
-      setConf(new Configuration());
-    }
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.delete(getConf(), output);
     }
@@ -89,10 +86,10 @@ public final class ClusterOutputPostProc
    * Post processes the output of clustering algorithms and groups them into respective clusters. Each
    * cluster's vectors are written into a directory named after its clusterId.
    *
-   * @param input         The output path provided to the clustering algorithm, whose would be post processed. Hint : The
+   * @param input         The output path provided to the clustering algorithm, whose would be post processed. Hint: The
    *                      path of the directory containing clusters-*-final and clusteredPoints.
    * @param output        The post processed data would be stored at this path.
-   * @param runSequential If set to true, post processes it sequentially, else, uses. MapReduce. Hint : If the clustering
+   * @param runSequential If set to true, post processes it sequentially, else, uses. MapReduce. Hint: If the clustering
    *                      was done sequentially, make it sequential, else vice versa.
    */
   public static void run(Path input, Path output, boolean runSequential) throws IOException,
@@ -134,7 +131,8 @@ public final class ClusterOutputPostProc
   private static void postProcessMR(Configuration conf, Path input, Path output) throws IOException,
           InterruptedException,
           ClassNotFoundException {
-    System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, as the MapReduce option will not work properly");
+    System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, "
+        + "as the MapReduce option will not work properly");
     int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf);
     conf.set("clusterOutputPath", input.toString());
     Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java Wed Jun 12 20:44:19 2013
@@ -48,9 +48,10 @@ public class ClusterOutputPostProcessorM
   }
 
   @Override
-  public void map(IntWritable key, WeightedVectorWritable val, Context context) throws IOException, InterruptedException {
-    //by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer, since they
-    //are numbered from 0 to k-1, where k is the number of clusters
+  public void map(IntWritable key, WeightedVectorWritable val, Context context)
+    throws IOException, InterruptedException {
+    // by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer,
+    // since they are numbered from 0 to k-1, where k is the number of clusters
     outputVector.set(val.getVector());
     context.write(new IntWritable(newClusterMappings.get(key.get())), outputVector);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java Wed Jun 12 20:44:19 2013
@@ -29,8 +29,8 @@ import java.util.Map;
 /**
  * Reducer for post processing cluster output.
  */
-public class ClusterOutputPostProcessorReducer extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
-
+public class ClusterOutputPostProcessorReducer
+    extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
 
   private Map<Integer, Integer> reverseClusterMappings;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Wed Jun 12 20:44:19 2013
@@ -114,7 +114,7 @@ public abstract class AbstractJob extend
   private Group group;
 
   protected AbstractJob() {
-    options = new LinkedList<Option>();
+    options = Lists.newLinkedList();;
   }
 
   /** Returns the input path established by a call to {@link #parseArguments(String[])}.
@@ -415,19 +415,19 @@ public abstract class AbstractJob extend
     return res;
   }
 
-  public int getInt(String optionName){
+  public int getInt(String optionName) {
     return Integer.parseInt(getOption(optionName));
   }
 
-  public int getInt(String optionName, int defaultVal){
+  public int getInt(String optionName, int defaultVal) {
     return Integer.parseInt(getOption(optionName, String.valueOf(defaultVal)));
   }
 
-  public float getFloat(String optionName){
+  public float getFloat(String optionName) {
     return Float.parseFloat(getOption(optionName));
   }
 
-  public float getFloat(String optionName, float defaultVal){
+  public float getFloat(String optionName, float defaultVal) {
     return Float.parseFloat(getOption(optionName, String.valueOf(defaultVal)));
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Wed Jun 12 20:44:19 2013
@@ -90,11 +90,8 @@ public final class TimingStatistics impl
         + "stdDev = " + DF.format(getStdDevTime() / 1000.0) + "us;";
   }
 
-  public Call newCall() {
-    return new Call();
-  }
-
-  /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT to warm up. */
+  /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT
+   *  to warm up. */
   public Call newCall(long leadTimeUsec) {
     if (leadSumTime > leadTimeUsec) {
       return new Call();
@@ -104,7 +101,7 @@ public final class TimingStatistics impl
   }
 
   /** Ignores counting the performance metrics. The caller should enough time for the JIT to warm up. */
-  public class LeadTimeCall extends Call {
+  public final class LeadTimeCall extends Call {
 
     private LeadTimeCall() { }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java Wed Jun 12 20:44:19 2013
@@ -35,7 +35,7 @@ public interface DistanceMeasure extends
   double distance(Vector v1, Vector v2);
   
   /**
-   * Optimized version of distance metric for sparse vectors. This distance computation requires operations
+   * Optimized version of distance metric for sparse vectors. This distance computation requires OPERATIONS
    * proportional to the number of non-zero elements in the vector instead of the cardinality of the vector.
    * 
    * @param centroidLengthSquare

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java Wed Jun 12 20:44:19 2013
@@ -32,6 +32,8 @@ import com.google.common.base.Charsets;
 import com.google.common.collect.AbstractIterator;
 import com.google.common.io.Closeables;
 import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner
@@ -43,14 +45,17 @@ public final class FileLineIterator exte
 
   private final BufferedReader reader;
 
-  /**
-   * Creates a  over a given file, assuming a UTF-8 encoding.
-   * 
-   * @throws java.io.FileNotFoundException
-   *           if the file does not exist
-   * @throws IOException
-   *           if the file cannot be read
-   */
+  private static final Logger log = LoggerFactory.getLogger(FileLineIterator.class);
+
+      /**
+       * Creates a  over a given file, assuming a UTF-8 encoding.
+       *
+       * @throws java.io.FileNotFoundException
+       *           if the file does not exist
+       * @throws IOException
+       *           if the file cannot be read
+       */
+
   public FileLineIterator(File file) throws IOException {
     this(file, Charsets.UTF_8, false);
   }
@@ -115,7 +120,7 @@ public final class FileLineIterator exte
       try {
         close();
       } catch (IOException e) {
-        //we are throwing here anyway, so do nothing
+        log.error(e.getMessage(), e);
       }
       throw new IllegalStateException(ioe);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java Wed Jun 12 20:44:19 2013
@@ -30,6 +30,8 @@ import org.apache.hadoop.io.SequenceFile
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.mahout.common.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p>{@link java.util.Iterator} over a {@link SequenceFile}'s keys and values, as a {@link Pair}
@@ -47,9 +49,12 @@ public final class SequenceFileIterator<
   private V value;
   private final boolean reuseKeyValueInstances;
 
-  /**
-   * @throws IOException if path can't be read, or its key or value class can't be instantiated
-   */
+  private static final Logger log = LoggerFactory.getLogger(SequenceFileIterator.class);
+
+      /**
+       * @throws IOException if path can't be read, or its key or value class can't be instantiated
+       */
+
   public SequenceFileIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
     key = null;
     value = null;
@@ -104,7 +109,7 @@ public final class SequenceFileIterator<
       try {
         close();
       } catch (IOException e) {
-        //throwing next anyway
+        log.error(e.getMessage(), e);
       }
       throw new IllegalStateException(ioe);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java Wed Jun 12 20:44:19 2013
@@ -28,6 +28,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p>{@link java.util.Iterator} over a {@link SequenceFile}'s values only.</p>
@@ -41,9 +43,12 @@ public final class SequenceFileValueIter
   private V value;
   private final boolean reuseKeyValueInstances;
 
-  /**
-   * @throws IOException if path can't be read, or its key or value class can't be instantiated
-   */
+  private static final Logger log = LoggerFactory.getLogger(SequenceFileValueIterator.class);
+
+      /**
+       * @throws IOException if path can't be read, or its key or value class can't be instantiated
+       */
+
   public SequenceFileValueIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
     value = null;
     FileSystem fs = path.getFileSystem(conf);
@@ -83,7 +88,7 @@ public final class SequenceFileValueIter
       try {
         close();
       } catch (IOException e) {
-        //throw the original exception next
+        log.error(e.getMessage(), e);
       }
       throw new IllegalStateException(ioe);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java Wed Jun 12 20:44:19 2013
@@ -66,7 +66,7 @@ import java.util.concurrent.Future;
  * @param <T> The payload class.
  */
 public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, Closeable {
-  // used to execute operations on the population in thread parallel.
+  // used to execute OPERATIONS on the population in thread parallel.
   private ExecutorService pool;
 
   // threadCount is serialized so that we can reconstruct the thread pool

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java Wed Jun 12 20:44:19 2013
@@ -34,7 +34,7 @@ import java.util.concurrent.atomic.Atomi
 /**
  * Records evolutionary state and provides a mutation operation for recorded-step meta-mutation.
  *
- * You provide the payload, this class provides the mutation operations.  During mutation,
+ * You provide the payload, this class provides the mutation OPERATIONS.  During mutation,
  * the payload is copied and after the state variables are changed, they are passed to the
  * payload.
  *

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Wed Jun 12 20:44:19 2013
@@ -221,7 +221,7 @@ public final class PFPGrowth {
     int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
     if (fList.size() % numGroups != 0) {
-        maxPerGroup++;
+      maxPerGroup++;
     }
     params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java Wed Jun 12 20:44:19 2013
@@ -23,6 +23,7 @@ import java.util.HashSet;
 import java.util.Set;
 import java.util.regex.Pattern;
 
+import com.google.common.collect.Sets;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -46,7 +47,7 @@ public class ParallelCountingMapper exte
                                                                       InterruptedException {
     
     String[] items = splitter.split(input.toString());
-    Set<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
+    Set<String> uniqueItems = Sets.newHashSet(Arrays.asList(items));
     for (String item : uniqueItems) {
       if (item.trim().isEmpty()) {
         continue;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Wed Jun 12 20:44:19 2013
@@ -47,7 +47,7 @@ import com.google.common.collect.Iterato
 
 /**
  * DistributedRowMatrix is a FileSystem-backed VectorIterable in which the vectors live in a
- * SequenceFile<WritableComparable,VectorWritable>, and distributed operations are executed as M/R passes on
+ * SequenceFile<WritableComparable,VectorWritable>, and distributed OPERATIONS are executed as M/R passes on
  * Hadoop.  The usage is as follows: <p>
  * <p>
  * <pre>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Wed Jun 12 20:44:19 2013
@@ -85,9 +85,6 @@ public class VectorDistanceSimilarityJob
       HadoopUtil.delete(getConf(), output);
     }
     DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
-    if (getConf() == null) {
-      setConf(new Configuration());
-    }
     String outType = getOption(OUT_TYPE_KEY, "pw");
     
     Double maxDistance = null;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java Wed Jun 12 20:44:19 2013
@@ -27,6 +27,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.regex.Matcher;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.lang3.Validate;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
@@ -252,7 +253,7 @@ public final class ABtDenseOutJob {
                * conditions will never kick in. Or, the only situation where we
                * can't fit Y_i block in memory is when A input is much sparser
                * than k+p per row. But if this is the case, then we'd be looking
-               * at very few elements without engaging them in any operations so
+               * at very few elements without engaging them in any OPERATIONS so
                * even then it should be ok.
                */
               if (j < aRowBegin) {
@@ -364,7 +365,7 @@ public final class ABtDenseOutJob {
       NUMBER_FORMAT.setGroupingUsed(false);
     }
 
-    private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+    private final Deque<Closeable> closeables = Lists.newLinkedList();
 
     protected int blockHeight;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.Deque;
 import java.util.LinkedList;
 import java.util.regex.Matcher;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.lang3.Validate;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
@@ -293,7 +294,7 @@ public final class ABtJob {
       NUMBER_FORMAT.setGroupingUsed(false);
     }
 
-    private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+    private final Deque<Closeable> closeables = Lists.newLinkedList();
     protected final SparseRowBlockWritable accum = new SparseRowBlockWritable();
 
     protected int blockHeight;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java Wed Jun 12 20:44:19 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.Deque;
 import java.util.LinkedList;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
@@ -76,7 +77,7 @@ public final class QJob {
       Mapper<Writable, VectorWritable, SplitPartitionedWritable, VectorWritable> {
 
     private MultipleOutputs outputs;
-    private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+    private final Deque<Closeable> closeables = Lists.newLinkedList();
     private SplitPartitionedWritable qHatKey;
     private SplitPartitionedWritable rHatKey;
     private Vector yRow;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java Wed Jun 12 20:44:19 2013
@@ -62,7 +62,7 @@ public final class YtYJob {
      * we keep yRow in a dense form here but keep an eye not to dense up while
      * doing YtY products. I am not sure that sparse vector would create much
      * performance benefits since we must to assume that y would be more often
-     * dense than sparse, so for bulk dense operations that would perform
+     * dense than sparse, so for bulk dense OPERATIONS that would perform
      * somewhat better than a RandomAccessSparse vector frequent updates.
      */
     private Vector yRow;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java Wed Jun 12 20:44:19 2013
@@ -30,8 +30,8 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.hadoop.stochasticsvd.UpperTriangular;
 
 /**
- * Givens Thin solver. Standard Givens operations are reordered in a way that
- * helps us to push them thru MapReduce operations in a block fashion.
+ * Givens Thin solver. Standard Givens OPERATIONS are reordered in a way that
+ * helps us to push them thru MapReduce OPERATIONS in a block fashion.
  */
 public class GivensThinSolver {
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java Wed Jun 12 20:44:19 2013
@@ -71,7 +71,8 @@ public class BruteSearch extends Updatab
     limit = Math.min(limit, referenceVectors.size());
     // A priority queue of the best @limit elements, ordered from worst to best so that the worst
     // element is always on top and can easily be removed.
-    PriorityQueue<WeightedThing<Integer>> bestNeighbors = new PriorityQueue<WeightedThing<Integer>>(limit, Ordering.natural().reverse());
+    PriorityQueue<WeightedThing<Integer>> bestNeighbors =
+        new PriorityQueue<WeightedThing<Integer>>(limit, Ordering.natural().reverse());
     // The resulting list of weighted WeightedVectors (the weight is the distance from the query).
     List<WeightedThing<Vector>> results =
         Lists.newArrayListWithCapacity(limit);