You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/06/12 22:44:21 UTC

svn commit: r1492416 [2/3] - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/clas...

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java Wed Jun 12 20:44:19 2013
@@ -29,7 +29,7 @@ import org.apache.mahout.math.WeightedVe
  * http://www.cs.princeton.edu/courses/archive/spring04/cos598B/bib/CharikarEstim.pdf
  */
 public class HashedVector extends WeightedVector {
-  protected static int INVALID_INDEX = -1;
+  protected static final int INVALID_INDEX = -1;
 
   /**
    * Value of the locality sensitive hash. It is 64 bit.

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java Wed Jun 12 20:44:19 2013
@@ -100,12 +100,12 @@ public class ProjectionSearch extends Up
     }
     int numVectors = scalarProjections.get(0).size();
     for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) {
-      Preconditions.checkArgument(s.size() == numVectors, "Number of vectors in projection sets " +
-          "differ");
+      Preconditions.checkArgument(s.size() == numVectors, "Number of vectors in projection sets "
+          + "differ");
       double firstWeight = s.firstEntry().getElement().getWeight();
       for (WeightedThing<Vector> w : s) {
-        Preconditions.checkArgument(firstWeight <= w.getWeight(), "Weights not in non-decreasing " +
-            "order");
+        Preconditions.checkArgument(firstWeight <= w.getWeight(), "Weights not in non-decreasing "
+            + "order");
         firstWeight = w.getWeight();
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public abstract class Searcher implement
     this.distanceMeasure = distanceMeasure;
   }
 
-  public DistanceMeasure getDistanceMeasure(){
+  public DistanceMeasure getDistanceMeasure() {
     return distanceMeasure;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java Wed Jun 12 20:44:19 2013
@@ -81,7 +81,7 @@ public final class RandomProjector {
   public static Matrix generateBasisZeroPlusMinusOne(int projectedVectorSize, int vectorSize) {
     Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize);
     Multinomial<Double> choice = new Multinomial<Double>();
-    choice.add(0.0, 2/3.0);
+    choice.add(0.0, 2 / 3.0);
     choice.add(Math.sqrt(3.0), 1 / 6.0);
     choice.add(-Math.sqrt(3.0), 1 / 6.0);
     for (int i = 0; i < projectedVectorSize; ++i) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java Wed Jun 12 20:44:19 2013
@@ -66,7 +66,7 @@ import org.slf4j.LoggerFactory;
  * This is a dictionary based Vectorizer.
  */
 public final class DictionaryVectorizer extends AbstractJob implements Vectorizer {
-  private static Logger log = LoggerFactory.getLogger(DictionaryVectorizer.class);
+  private static final Logger log = LoggerFactory.getLogger(DictionaryVectorizer.class);
   
   public static final String DOCUMENT_VECTOR_OUTPUT_FOLDER = "tf-vectors";
   public static final String MIN_SUPPORT = "min.support";
@@ -377,14 +377,18 @@ public final class DictionaryVectorizer 
     addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: 2", "2");
     addOption("maxNGramSize", "ng", "(Optional) The maximum size of ngrams to create"
                             + " (2 = bigrams, 3 = trigrams, etc) Default Value:1");
-    addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float)  Default is " + LLRReducer.DEFAULT_MIN_LLR);
-    addOption("norm", "n", "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
+    addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
+        + LLRReducer.DEFAULT_MIN_LLR);
+    addOption("norm", "n", "The norm to use, expressed as either a float or \"INF\" "
+        + "if you want to use the Infinite norm.  "
                     + "Must be greater or equal to 0.  The default is not to normalize");
-    addOption("logNormalize", "lnorm", "(Optional) Whether output vectors should be logNormalize. If set true else false", "false");
+    addOption("logNormalize", "lnorm", "(Optional) Whether output vectors should be logNormalize. "
+        + "If set true else false", "false");
     addOption(DefaultOptionCreator.numReducersOption().create());
     addOption("chunkSize", "chunk", "The chunkSize in MegaBytes. 100-10000 MB", "100");
     addOption(DefaultOptionCreator.methodOption().create());
-    addOption("namedVector", "nv", "(Optional) Whether output vectors should be NamedVectors. If set true else false", "false");
+    addOption("namedVector", "nv", "(Optional) Whether output vectors should be NamedVectors. "
+        + "If set true else false", "false");
     if (parseArguments(args) == null) {
       return -1;
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Wed Jun 12 20:44:19 2013
@@ -96,8 +96,9 @@ public final class SparseVectorsFromSequ
             abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create()).withDescription(
             "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
                     + "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
-                    + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less than 0 "
-                    + "no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent").withShortName("xs").create();
+                    + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
+                    + "than 0 no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent")
+            .withShortName("xs").create();
 
     Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false).withArgument(
             abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()).withDescription(

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public class WordsPrunerReducer extends
 
   @Override
   protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context)
-          throws IOException, InterruptedException {
+    throws IOException, InterruptedException {
     Iterator<VectorWritable> it = values.iterator();
     if (!it.hasNext()) {
       return;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java Wed Jun 12 20:44:19 2013
@@ -39,18 +39,15 @@ import org.apache.mahout.math.VectorWrit
 import org.apache.mahout.math.map.OpenObjectIntHashMap;
 import org.apache.mahout.vectorizer.DictionaryVectorizer;
 import org.apache.mahout.vectorizer.common.PartialVectorMerger;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Iterator;
 
 /**
  * Converts a document in to a sparse vector
  */
 public class TFPartialVectorReducer extends Reducer<Text, StringTuple, Text, VectorWritable> {
-  private transient static Logger log = LoggerFactory.getLogger(TFPartialVectorReducer.class);
+
   private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<String>();
 
   private int dimension;
@@ -63,7 +60,7 @@ public class TFPartialVectorReducer exte
 
   @Override
   protected void reduce(Text key, Iterable<StringTuple> values, Context context)
-          throws IOException, InterruptedException {
+    throws IOException, InterruptedException {
     Iterator<StringTuple> it = values.iterator();
     if (!it.hasNext()) {
       return;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.IntWritable;
@@ -148,7 +149,7 @@ public class RecommenderJobTest extends 
 
     EasyMock.replay(context, userCounters);
 
-    Collection<VarLongWritable> varLongWritables = new LinkedList<VarLongWritable>();
+    Collection<VarLongWritable> varLongWritables = Lists.newLinkedList();
     varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
     varLongWritables.add(new EntityPrefWritable(56L, 2.0f));
 
@@ -653,7 +654,7 @@ public class RecommenderJobTest extends 
       public boolean matches(Object argument) {
         if (argument instanceof RecommendedItemsWritable) {
           RecommendedItemsWritable recommendedItemsWritable = (RecommendedItemsWritable) argument;
-          List<RecommendedItem> expectedItems = new LinkedList<RecommendedItem>(Arrays.asList(items));
+          List<RecommendedItem> expectedItems = Arrays.asList(items);
           return expectedItems.equals(recommendedItemsWritable.getRecommendedItems());
         }
         return false;
@@ -913,7 +914,7 @@ public class RecommenderJobTest extends 
       String[] tokens = keyValue[1].replaceAll("\\[", "")
           .replaceAll("\\]", "").split(",");
 
-      List<RecommendedItem> items = new LinkedList<RecommendedItem>();
+      List<RecommendedItem> items = Lists.newLinkedList();
       for (String token : tokens) {
         String[] itemTokens = token.split(":");
         long itemID = Long.parseLong(itemTokens[0]);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java Wed Jun 12 20:44:19 2013
@@ -44,7 +44,7 @@ import org.easymock.IArgumentMatcher;
 import org.junit.Assert;
 
 /**
- * a collection of small helper methods useful for unit-testing mathematical operations
+ * a collection of small helper methods useful for unit-testing mathematical OPERATIONS
  */
 public final class MathHelper {
 
@@ -161,9 +161,7 @@ public final class MathHelper {
     for (Pair<IntWritable,VectorWritable> record :
         new SequenceFileIterable<IntWritable,VectorWritable>(path, true, conf)) {
       IntWritable key = record.getFirst();
-      VectorWritable value = record.getSecond();
       readOneRow = true;
-      int row = key.get();
       rows.put(key.get(), record.getSecond().get());
     }
     if (!readOneRow) {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,7 @@ import java.util.Deque;
 import java.util.LinkedList;
 import java.util.Random;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -63,7 +64,7 @@ public class LocalSSVDPCADenseTest exten
     // conf.set("mapred.job.tracker","localhost:11011");
     // conf.set("fs.default.name","hdfs://localhost:11010/");
 
-    Deque<Closeable> closeables = new LinkedList<Closeable>();
+    Deque<Closeable> closeables = Lists.newLinkedList();
     Random rnd = RandomUtils.getRandom();
 
     File tmpDir = getTestTempDir("svdtmp");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,7 @@ import java.util.Deque;
 import java.util.LinkedList;
 import java.util.Random;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -78,7 +79,7 @@ public class LocalSSVDSolverSparseSequen
     // conf.set("mapred.job.tracker","localhost:11011");
     // conf.set("fs.default.name","hdfs://localhost:11010/");
 
-    Deque<Closeable> closeables = new LinkedList<Closeable>();
+    Deque<Closeable> closeables = Lists.newLinkedList();;
     Random rnd = RandomUtils.getRandom();
 
     File tmpDir = getTestTempDir("svdtmp");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java Wed Jun 12 20:44:19 2013
@@ -20,6 +20,7 @@ package org.apache.mahout.vectorizer;
 import java.util.LinkedList;
 import java.util.List;
 
+import com.google.common.collect.Lists;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -92,7 +93,7 @@ public class EncodedVectorsFromSequenceF
     Path tmpPath = getTestTempDirPath();
     Path outputPath = new Path(tmpPath, "output");
     
-    List<String> argList = new LinkedList<String>();
+    List<String> argList = Lists.newLinkedList();;
     argList.add("-i");
     argList.add(inputPath.toString());
     argList.add("-o");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@ package org.apache.mahout.vectorizer;
  * limitations under the License.
  */
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -88,7 +89,7 @@ public class HighDFWordsPrunerTest exten
   private void runTest(boolean prune) throws Exception {
     Path outputPath = getTestTempFilePath("output");
 
-    List<String> argList = new LinkedList<String>();
+    List<String> argList = Lists.newLinkedList();
     argList.add("-i");
     argList.add(inputPath.toString());
     argList.add("-o");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
 
+import com.google.common.collect.Lists;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -164,7 +165,7 @@ public class SparseVectorsFromSequenceFi
   private Path runTest(boolean tfWeighting, boolean sequential, boolean named, double maxDFSigma, int numDocs) throws Exception {
     Path outputPath = getTestTempFilePath("output");
 
-    List<String> argList = new LinkedList<String>();
+    List<String> argList = Lists.newLinkedList();
     argList.add("-i");
     argList.add(inputPath.toString());
     argList.add("-o");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.LinkedList;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.Reducer.Context;
 import org.apache.mahout.common.MahoutTestCase;
@@ -75,7 +76,7 @@ public final class CollocReducerTest ext
     for (Gram[] ii : input) {
       key.set(ii[0], empty);
 
-      Collection<Gram> vv = new LinkedList<Gram>();
+      Collection<Gram> vv = Lists.newLinkedList();
       vv.addAll(Arrays.asList(ii));
       c.reduce(key, vv, context);
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.LinkedList;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.Text;
@@ -106,7 +107,7 @@ public final class LLRReducerTest extend
     reducer.setup(context);
     
     for (Gram[] ii: input) {
-      Collection<Gram> vv = new LinkedList<Gram>();
+      Collection<Gram> vv = Lists.newLinkedList();
       vv.addAll(Arrays.asList(ii).subList(1, ii.length));
       reducer.reduce(ii[0], vv, context);
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java Wed Jun 12 20:44:19 2013
@@ -99,10 +99,6 @@ public final class MailToPrefsDriver ext
     int chunkSize = Integer.parseInt(getOption("chunkSize"));
     String separator = getOption("separator");
     Configuration conf = getConf();
-    if (conf == null) {
-      setConf(new Configuration());
-      conf = getConf();
-    }
     boolean useCounts = hasOption("useCounts");
     AtomicInteger currentPhase = new AtomicInteger();
     int[] msgDim = new int[1];

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java Wed Jun 12 20:44:19 2013
@@ -29,6 +29,8 @@ import org.apache.mahout.cf.taste.impl.m
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.common.iterator.FileLineIterator;
 import org.apache.mahout.common.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p>An {@link java.util.Iterator} which iterates over any of the KDD Cup's rating files. These include the files
@@ -49,6 +51,8 @@ public final class DataFileIterator
 
   private final FileLineIterator lineIterator;
 
+  private static final Logger log = LoggerFactory.getLogger(DataFileIterator.class);
+
   public DataFileIterator(File dataFile) throws IOException {
     if (dataFile == null || dataFile.isDirectory() || !dataFile.exists()) {
       throw new IllegalArgumentException("Bad data file: " + dataFile);
@@ -132,7 +136,7 @@ public final class DataFileIterator
     try {
       Closeables.close(lineIterator, true);
     } catch (IOException e) {
-      //nothing
+      log.error(e.getMessage(), e);
     }
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.regex.Pattern;
 
 import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.io.CharStreams;
 import com.google.common.io.Resources;
@@ -114,13 +115,13 @@ public final class PosTagger {
    */
   private static void readFromURL(String url, boolean assignIDs) throws IOException {
     // initialize the data structure
-    hiddenSequences = new LinkedList<int[]>();
-    observedSequences = new LinkedList<int[]>();
+    hiddenSequences = Lists.newLinkedList();
+    observedSequences = Lists.newLinkedList();
     readLines = 0;
 
     // now read line by line of the input file
-    List<Integer> observedSequence = new LinkedList<Integer>();
-    List<Integer> hiddenSequence = new LinkedList<Integer>();
+    List<Integer> observedSequence = Lists.newLinkedList();
+    List<Integer> hiddenSequence = Lists.newLinkedList();
 
     for (String line : CharStreams.readLines(Resources.newReaderSupplier(new URL(url), Charsets.UTF_8))) {
       if (line.isEmpty()) {
@@ -155,8 +156,17 @@ public final class PosTagger {
       Integer wordID = wordIDs.get(tags[0]);
       Integer tagID = tagIDs.get(tags[1]);
       // now construct the current sequence
-      observedSequence.add(wordID == null ? 0 : wordID);
-      hiddenSequence.add(tagID == null ? 0 : tagID);
+      if (wordID == null) {
+        observedSequence.add(0);
+      } else {
+        observedSequence.add(wordID);
+      }
+
+      if (tagID == null) {
+        hiddenSequence.add(0);
+      } else {
+        hiddenSequence.add(tagID);
+      }
     }
 
     // if there is still something in the pipe, register it

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Wed Jun 12 20:44:19 2013
@@ -30,6 +30,8 @@ import org.apache.mahout.math.list.IntAr
 import org.apache.mahout.math.stats.OnlineSummarizer;
 import org.apache.mahout.vectorizer.encoders.ConstantValueEncoder;
 import org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.Closeable;
@@ -64,8 +66,9 @@ public final class SimpleCsvExamples {
   public static final char SEPARATOR_CHAR = '\t';
   private static final int FIELDS = 100;
 
-  private SimpleCsvExamples() {
-  }
+  private static final Logger log = LoggerFactory.getLogger(SimpleCsvExamples.class);
+
+  private SimpleCsvExamples() {}
 
   public static void main(String[] args) throws IOException {
     FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS];
@@ -280,7 +283,7 @@ public final class SimpleCsvExamples {
       try {
         Closeables.close(in, true);
       } catch (IOException e) {
-        //nothing
+        log.error(e.getMessage(), e);
       }
     }
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Wed Jun 12 20:44:19 2013
@@ -89,8 +89,8 @@ public class DisplayKMeans extends Displ
   }
   
   private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output,
-      DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta) throws IOException, InterruptedException,
-      ClassNotFoundException {
+    DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Path clustersIn = new Path(output, "random-seeds");
     RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
     KMeansDriver.run(samples, clustersIn, output, measure, convergenceDelta, maxIterations, true, 0.0, true);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java Wed Jun 12 20:44:19 2013
@@ -141,8 +141,7 @@ public class ClusterQualitySummarizer {
           ClusteringUtils.summarizeClusterDistances(datapoints, centroids, distanceMeasure);
       List<OnlineSummarizer> compareSummaries = null;
       if (centroidsCompare != null) {
-            compareSummaries =
-                ClusteringUtils.summarizeClusterDistances(datapoints, centroidsCompare, distanceMeasure);
+        compareSummaries = ClusteringUtils.summarizeClusterDistances(datapoints, centroidsCompare, distanceMeasure);
       }
       System.out.printf("[Dunn Index] First: %f", ClusteringUtils.dunnIndex(centroids, distanceMeasure, summaries));
       if (compareSummaries != null) {
@@ -198,8 +197,8 @@ public class ClusterQualitySummarizer {
         .withShortName("cc")
         .withRequired(false)
         .withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create())
-        .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or " +
-            "StreamingKMeansDriver)")
+        .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or "
+            + "StreamingKMeansDriver)")
         .create();
 
     Option outputFileOption = builder.withLongName("output")

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java Wed Jun 12 20:44:19 2013
@@ -10,6 +10,9 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 public class IOUtils {
+
+  private IOUtils() {}
+
   /**
    * Converts CentroidWritable values in a sequence file into Centroids lazily.
    * @param dirIterable the source iterable (comes from a SequenceFileDirIterable).

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java Wed Jun 12 20:44:19 2013
@@ -25,6 +25,7 @@ import org.apache.mahout.common.iterator
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
 
 public class ResplitSequenceFiles {
+
   private String inputFile;
   private String outputFileBase;
   private int numSplits;
@@ -32,6 +33,8 @@ public class ResplitSequenceFiles {
   private Configuration conf;
   private FileSystem fs;
 
+  private ResplitSequenceFiles() {}
+
   private void writeSplit(Iterator<Pair<Writable, Writable>> inputIterator,
                           int numSplit, int numEntriesPerSplit) throws IOException {
     SequenceFile.Writer splitWriter = null;
@@ -85,8 +88,8 @@ public class ResplitSequenceFiles {
         .withShortName("o")
         .withRequired(true)
         .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
-        .withDescription("the base name of the file split that the files will be split it; the i'th split has the " +
-            "suffix -i")
+        .withDescription("the base name of the file split that the files will be split it; the i'th split has the "
+            + "suffix -i")
         .create();
 
     Option numSplitsOption = builder.withLongName("numSplits")

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Wed Jun 12 20:44:19 2013
@@ -319,7 +319,8 @@ public class VectorBenchmarks {
         .withLongName("numClusters")
         .withRequired(false)
         .withArgument(abuilder.withName("nc").withDefault(0).create())
-        .withDescription("Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0").withShortName("nc").create();
+        .withDescription("Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0")
+        .withShortName("nc").create();
     Option numOpsOpt = obuilder
         .withLongName("numOps")
         .withRequired(false)

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java Wed Jun 12 20:44:19 2013
@@ -458,7 +458,7 @@ public final class HBaseDataModel implem
     Scan scan = new Scan(new byte[]{0x69}, new byte[]{0x70});
     scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
     ResultScanner scanner = table.getScanner(scan);
-    Collection<Long> ids = new LinkedList<Long>();
+    Collection<Long> ids = Lists.newLinkedList();
     for (Result result : scanner) {
       ids.add(bytesToUserOrItemID(result.getRow()));
     }
@@ -483,7 +483,7 @@ public final class HBaseDataModel implem
     Scan scan = new Scan(new byte[]{0x75}, new byte[]{0x76});
     scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
     ResultScanner scanner = table.getScanner(scan);
-    Collection<Long> ids = new LinkedList<Long>();
+    Collection<Long> ids = Lists.newLinkedList();
     for (Result result : scanner) {
       ids.add(bytesToUserOrItemID(result.getRow()));
     }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Wed Jun 12 20:44:19 2013
@@ -25,7 +25,7 @@ import org.apache.mahout.cf.taste.impl.m
  * MySQL-specific implementation. Should be used in conjunction with a
  * {@link org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel}. This
  * implementation stores item-item diffs in a MySQL database and encapsulates some other slope-one-specific
- * operations that are needed on the preference data in the database. It assumes the database has a schema
+ * OPERATIONS that are needed on the preference data in the database. It assumes the database has a schema
  * like:
  * </p>
  *

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java Wed Jun 12 20:44:19 2013
@@ -26,10 +26,9 @@ import java.util.regex.Pattern;
  * A wrapper class to convert an IndexFileNameFilter which implements
  * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
  */
-class LuceneIndexFileNameFilter implements PathFilter {
+final class LuceneIndexFileNameFilter implements PathFilter {
 
-  private static final LuceneIndexFileNameFilter singleton =
-          new LuceneIndexFileNameFilter();
+  private static final LuceneIndexFileNameFilter LUCENE_INDEX_FILE_NAME_FILTER = new LuceneIndexFileNameFilter();
 
   /**
    * Get a static instance.
@@ -37,18 +36,15 @@ class LuceneIndexFileNameFilter implemen
    * @return the static instance
    */
   public static LuceneIndexFileNameFilter getFilter() {
-    return singleton;
+    return LUCENE_INDEX_FILE_NAME_FILTER;
   }
 
-  private LuceneIndexFileNameFilter() {
-  }
+  private LuceneIndexFileNameFilter() {}
 
-  //TODO: Lucene defines this in IndexFileNames, but it is package private, so make sure it doesn't change w/ new releases.
+  //TODO: Lucene defines this in IndexFileNames, but it is package private,
+  // so make sure it doesn't change w/ new releases.
   private static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
 
-  /* (non-Javadoc)
-  * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
-  */
   public boolean accept(Path path) {
     String name = path.getName();
     if (CODEC_FILE_PATTERN.matcher(name).matches() || name.startsWith(IndexFileNames.SEGMENTS)) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@ package org.apache.mahout.text;
  * limitations under the License.
  */
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -48,18 +49,21 @@ public class LuceneSegmentInputFormat ex
 
     LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);
 
-    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<LuceneSegmentInputSplit>();
+    List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList();
 
     List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
     for (Path indexPath : indexPaths) {
-      ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration);
+      ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+                                                                              false, configuration);
       SegmentInfos segmentInfos = new SegmentInfos();
       segmentInfos.read(directory);
 
       for (SegmentInfoPerCommit segmentInfo : segmentInfos) {
-        LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
+        LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name,
+                                                                         segmentInfo.sizeInBytes());
         inputSplits.add(inputSplit);
-        LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(), segmentInfo.info.name);
+        LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(),
+                 segmentInfo.info.name);
       }
     }
 
@@ -67,7 +71,8 @@ public class LuceneSegmentInputFormat ex
   }
 
   @Override
-  public RecordReader<Text, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+  public RecordReader<Text, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
+    throws IOException, InterruptedException {
     LuceneSegmentRecordReader luceneSegmentRecordReader = new LuceneSegmentRecordReader();
     luceneSegmentRecordReader.initialize(inputSplit, context);
     return luceneSegmentRecordReader;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java Wed Jun 12 20:44:19 2013
@@ -72,7 +72,8 @@ public class LuceneSegmentInputSplit ext
    * @throws IOException if an error occurs when accessing the directory
    */
   public SegmentInfoPerCommit getSegment(Configuration configuration) throws IOException {
-    ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration);
+    ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+                                                                            false, configuration);
 
     SegmentInfos segmentInfos = new SegmentInfos();
     segmentInfos.read(directory);
@@ -83,6 +84,7 @@ public class LuceneSegmentInputSplit ext
       }
     }
 
-    throw new IllegalArgumentException("No such segment: '" + segmentInfoName + "' in directory " + directory.toString());
+    throw new IllegalArgumentException("No such segment: '" + segmentInfoName
+        + "' in directory " + directory.toString());
   }
-}
\ No newline at end of file
+}

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java Wed Jun 12 20:44:19 2013
@@ -60,7 +60,8 @@ public class LuceneSegmentRecordReader e
     Weight weight = lucene2SeqConfiguration.getQuery().createWeight(searcher);
     scorer = weight.scorer(segmentReader.getContext(), false, false, null);
     if (scorer == null) {
-      throw new IllegalArgumentException("Could not create query scorer for query: " + lucene2SeqConfiguration.getQuery());
+      throw new IllegalArgumentException("Could not create query scorer for query: "
+          + lucene2SeqConfiguration.getQuery());
     }
   }
 
@@ -84,7 +85,8 @@ public class LuceneSegmentRecordReader e
 
   @Override
   public float getProgress() throws IOException, InterruptedException {
-    return scorer.cost() == 0 ? 0 : (float) nextDocId / scorer.cost();//this is a rough estimate, due to the possible inaccuracies of cost
+    //this is a rough estimate, due to the possible inaccuracies of cost
+    return scorer.cost() == 0 ? 0 : (float) nextDocId / scorer.cost();
   }
 
   @Override

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java Wed Jun 12 20:44:19 2013
@@ -28,9 +28,12 @@ import static org.apache.commons.lang.St
  *
  **/
 class LuceneSeqFileHelper {
+
   public static final String SEPARATOR_FIELDS = " ";
   public static final int USE_TERM_INFOS = 1;
 
+  private LuceneSeqFileHelper() {}
+
   public static void populateValues(Document document, Text theValue, List<String> fields) {
 
     StringBuilder valueBuilder = new StringBuilder();

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java Wed Jun 12 20:44:19 2013
@@ -17,6 +17,7 @@ package org.apache.mahout.text;
  */
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -77,7 +78,8 @@ public class LuceneStorageConfiguration 
    * @param idField                 field used for the key of the sequence file
    * @param fields                  field(s) used for the value of the sequence file
    */
-  public LuceneStorageConfiguration(Configuration configuration, List<Path> indexPaths, Path sequenceFilesOutputPath, String idField, List<String> fields) {
+  public LuceneStorageConfiguration(Configuration configuration, List<Path> indexPaths, Path sequenceFilesOutputPath,
+                                    String idField, List<String> fields) {
     Preconditions.checkArgument(configuration != null, "Parameter 'configuration' cannot be null");
     Preconditions.checkArgument(indexPaths != null, "Parameter 'indexPaths' cannot be null");
     Preconditions.checkArgument(indexPaths != null && !indexPaths.isEmpty(), "Parameter 'indexPaths' cannot be empty");
@@ -144,7 +146,8 @@ public class LuceneStorageConfiguration 
    * @return iterator
    */
   public Iterator<Pair<Text, Text>> getSequenceFileIterator() {
-    return new SequenceFileDirIterable<Text, Text>(sequenceFilesOutputPath, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
+    return new SequenceFileDirIterable<Text, Text>(sequenceFilesOutputPath, PathType.LIST, PathFilters.logsCRCFilter(),
+                                                   configuration).iterator();
   }
 
   public Configuration getConfiguration() {
@@ -202,16 +205,16 @@ public class LuceneStorageConfiguration 
   @Override
   public void readFields(DataInput in) throws IOException {
     try {
-      this.sequenceFilesOutputPath = new Path(in.readUTF());
-      this.indexPaths = new ArrayList<Path>();
+      sequenceFilesOutputPath = new Path(in.readUTF());
+      indexPaths = Lists.newArrayList();
       String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS);
       for (String indexPath : indexPaths) {
         this.indexPaths.add(new Path(indexPath));
       }
-      this.idField = in.readUTF();
-      this.fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
-      this.query = new QueryParser(LUCENE_43, "query", new StandardAnalyzer(LUCENE_43)).parse(in.readUTF());
-      this.maxHits = in.readInt();
+      idField = in.readUTF();
+      fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
+      query = new QueryParser(LUCENE_43, "query", new StandardAnalyzer(LUCENE_43)).parse(in.readUTF());
+      maxHits = in.readInt();
     } catch (ParseException e) {
       throw new RuntimeException("Could not deserialize " + this.getClass().getName(), e);
     }
@@ -219,18 +222,35 @@ public class LuceneStorageConfiguration 
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
 
     LuceneStorageConfiguration that = (LuceneStorageConfiguration) o;
 
-    if (maxHits != that.maxHits) return false;
-    if (fields != null ? !fields.equals(that.fields) : that.fields != null) return false;
-    if (idField != null ? !idField.equals(that.idField) : that.idField != null) return false;
-    if (indexPaths != null ? !indexPaths.equals(that.indexPaths) : that.indexPaths != null) return false;
-    if (query != null ? !query.equals(that.query) : that.query != null) return false;
-    if (sequenceFilesOutputPath != null ? !sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath) : that.sequenceFilesOutputPath != null)
+    if (maxHits != that.maxHits) {
+      return false;
+    }
+    if (fields != null ? !fields.equals(that.fields) : that.fields != null) {
+      return false;
+    }
+    if (idField != null ? !idField.equals(that.idField) : that.idField != null) {
       return false;
+    }
+    if (indexPaths != null ? !indexPaths.equals(that.indexPaths) : that.indexPaths != null) {
+      return false;
+    }
+    if (query != null ? !query.equals(that.query) : that.query != null) {
+      return false;
+    }
+    if (sequenceFilesOutputPath != null
+        ? !sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath)
+        : that.sequenceFilesOutputPath != null) {
+      return false;
+    }
 
     return true;
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java Wed Jun 12 20:44:19 2013
@@ -31,6 +31,8 @@ import org.apache.lucene.store.IOContext
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Lock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -49,15 +51,18 @@ public class ReadOnlyFileSystemDirectory
   private final Path directory;
   private final int ioFileBufferSize;
 
-  /**
-   * Constructor
-   *
-   * @param fs
-   * @param directory
-   * @param create
-   * @param conf
-   * @throws IOException
-   */
+  private static final Logger log = LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);
+
+      /**
+       * Constructor
+       *
+       * @param fs
+       * @param directory
+       * @param create
+       * @param conf
+       * @throws IOException
+       */
+
   public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean create,
                                      Configuration conf) throws IOException {
 
@@ -76,7 +81,7 @@ public class ReadOnlyFileSystemDirectory
         isDir = status.isDir();
       }
     } catch (IOException e) {
-      // file does not exist, isDir already set to false
+      log.error(e.getMessage(), e);
     }
     if (!isDir) {
       throw new IOException(directory + " is not a directory");
@@ -96,7 +101,7 @@ public class ReadOnlyFileSystemDirectory
         isDir = status.isDir();
       }
     } catch (IOException e) {
-      // file does not exist, isDir already set to false
+      log.error(e.getMessage(), e);
     }
     if (!isDir) {
       throw new IOException(directory + " is not a directory");
@@ -113,9 +118,6 @@ public class ReadOnlyFileSystemDirectory
     }
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#list()
-  */
   public String[] list() throws IOException {
     FileStatus[] fileStatus =
             fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
@@ -131,50 +133,23 @@ public class ReadOnlyFileSystemDirectory
     return list();
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#fileExists(java.lang.String)
-  */
+  @Override
   public boolean fileExists(String name) throws IOException {
     return fs.exists(new Path(directory, name));
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#fileModified(java.lang.String)
-  */
-  public long fileModified(String name) {
-    throw new UnsupportedOperationException();
-  }
-
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#touchFile(java.lang.String)
-  */
-  public void touchFile(String name) {
-    throw new UnsupportedOperationException();
-  }
-
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#fileLength(java.lang.String)
-  */
+  @Override
   public long fileLength(String name) throws IOException {
     return fs.getFileStatus(new Path(directory, name)).getLen();
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#deleteFile(java.lang.String)
-  */
+  @Override
   public void deleteFile(String name) throws IOException {
     if (!fs.delete(new Path(directory, name), true)) {
       throw new IOException("Cannot delete index file " + name);
     }
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#renameFile(java.lang.String, java.lang.String)
-  */
-  public void renameFile(String from, String to) throws IOException {
-    fs.rename(new Path(directory, from), new Path(directory, to));
-  }
-
   @Override
   public IndexOutput createOutput(String name, IOContext context) throws IOException {
     //TODO: What should we be doing with the IOContext here, if anything?
@@ -197,10 +172,7 @@ public class ReadOnlyFileSystemDirectory
     return new FileSystemIndexInput(new Path(directory, name), ioFileBufferSize);
   }
 
-
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#makeLock(java.lang.String)
-  */
+  @Override
   public Lock makeLock(final String name) {
     return new Lock() {
       public boolean obtain() {
@@ -220,21 +192,17 @@ public class ReadOnlyFileSystemDirectory
     };
   }
 
-  /* (non-Javadoc)
-  * @see org.apache.lucene.store.Directory#close()
-  */
+  @Override
   public void close() throws IOException {
     // do not close the file system
   }
 
-  /* (non-Javadoc)
-  * @see java.lang.Object#toString()
-  */
+  @Override
   public String toString() {
     return this.getClass().getName() + "@" + directory;
   }
 
-  private class FileSystemIndexInput extends BufferedIndexInput {
+  private class FileSystemIndexInput extends BufferedIndexInput implements Cloneable {
 
     // shared by clones
     private class Descriptor {
@@ -253,7 +221,7 @@ public class ReadOnlyFileSystemDirectory
     private boolean isClone;
 
     public FileSystemIndexInput(Path path, int ioFileBufferSize)
-            throws IOException {
+      throws IOException {
       super("FSII_" + path.getName(), ioFileBufferSize);
       filePath = path;
       descriptor = new Descriptor(path, ioFileBufferSize);
@@ -261,8 +229,9 @@ public class ReadOnlyFileSystemDirectory
       isOpen = true;
     }
 
+    @Override
     protected void readInternal(byte[] b, int offset, int len)
-            throws IOException {
+      throws IOException {
       long position = getFilePointer();
       if (position != descriptor.position) {
         descriptor.in.seek(position);
@@ -279,6 +248,7 @@ public class ReadOnlyFileSystemDirectory
       } while (total < len);
     }
 
+    @Override
     public void close() throws IOException {
       if (!isClone) {
         if (isOpen) {
@@ -290,20 +260,24 @@ public class ReadOnlyFileSystemDirectory
       }
     }
 
+    @Override
     protected void seekInternal(long position) {
       // handled in readInternal()
     }
 
+    @Override
     public long length() {
       return length;
     }
 
+    @Override
     protected void finalize() throws IOException {
       if (!isClone && isOpen) {
         close(); // close the file
       }
     }
 
+    @Override
     public BufferedIndexInput clone() {
       FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
       clone.isClone = true;
@@ -318,17 +292,19 @@ public class ReadOnlyFileSystemDirectory
     private boolean isOpen;
 
     public FileSystemIndexOutput(Path path, int ioFileBufferSize)
-            throws IOException {
+      throws IOException {
       filePath = path;
       // overwrite is true by default
       out = fs.create(path, true, ioFileBufferSize);
       isOpen = true;
     }
 
+    @Override
     public void flushBuffer(byte[] b, int offset, int size) throws IOException {
       out.write(b, offset, size);
     }
 
+    @Override
     public void close() throws IOException {
       if (isOpen) {
         super.close();
@@ -339,14 +315,17 @@ public class ReadOnlyFileSystemDirectory
       }
     }
 
+    @Override
     public void seek(long pos) throws IOException {
       throw new UnsupportedOperationException();
     }
 
+    @Override
     public long length() throws IOException {
       return out.getPos();
     }
 
+    @Override
     protected void finalize() throws IOException {
       if (isOpen) {
         close(); // close the file

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java Wed Jun 12 20:44:19 2013
@@ -65,9 +65,11 @@ public class SequenceFilesFromLuceneStor
       Configuration configuration = lucene2seqConf.getConfiguration();
       FileSystem fileSystem = FileSystem.get(configuration);
       Path sequenceFilePath = new Path(lucene2seqConf.getSequenceFilesOutputPath(), indexPath.getName());
-      final SequenceFile.Writer sequenceFileWriter = new SequenceFile.Writer(fileSystem, configuration, sequenceFilePath, Text.class, Text.class);
+      final SequenceFile.Writer sequenceFileWriter = new SequenceFile.Writer(fileSystem, configuration,
+          sequenceFilePath, Text.class, Text.class);
 
-      SeqFileWriterCollector writerCollector = new SeqFileWriterCollector(lucene2seqConf, sequenceFileWriter, processedDocs);
+      SeqFileWriterCollector writerCollector = new SeqFileWriterCollector(lucene2seqConf, sequenceFileWriter,
+          processedDocs);
       searcher.search(lucene2seqConf.getQuery(), writerCollector);
       log.info("Wrote " + writerCollector.processedDocs + " documents in " + sequenceFilePath.toUri());
       processedDocs = writerCollector.processedDocs;
@@ -84,7 +86,8 @@ public class SequenceFilesFromLuceneStor
     public int processedDocs;
     AtomicReaderContext arc;
 
-    SeqFileWriterCollector(LuceneStorageConfiguration lucene2seqConf, SequenceFile.Writer sequenceFileWriter, int processedDocs) {
+    SeqFileWriterCollector(LuceneStorageConfiguration lucene2seqConf, SequenceFile.Writer sequenceFileWriter,
+                           int processedDocs) {
       this.lucene2seqConf = lucene2seqConf;
       this.sequenceFileWriter = sequenceFileWriter;
       this.processedDocs = processedDocs;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java Wed Jun 12 20:44:19 2013
@@ -17,6 +17,7 @@ package org.apache.mahout.text;
  */
 
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
@@ -75,12 +76,9 @@ public class SequenceFilesFromLuceneStor
     }
 
     Configuration configuration = getConf();
-    if (configuration == null) {
-      configuration = new Configuration();
-    }
 
     String[] paths = getInputPath().toString().split(",");
-    List<Path> indexPaths = new ArrayList<Path>();
+    List<Path> indexPaths = Lists.newArrayList();
     for (String path : paths) {
       indexPaths.add(new Path(path));
     }
@@ -100,7 +98,8 @@ public class SequenceFilesFromLuceneStor
     if (hasOption(OPTION_QUERY)) {
       try {
         String queryString = COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
-        QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryString, new StandardAnalyzer(Version.LUCENE_43));
+        QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryString,
+            new StandardAnalyzer(Version.LUCENE_43));
         query = queryParser.parse(queryString);
       } catch (ParseException e) {
         throw new IllegalArgumentException(e.getMessage(), e);
@@ -115,7 +114,8 @@ public class SequenceFilesFromLuceneStor
     }
     lucene2SeqConf.setMaxHits(maxHits);
 
-    if (hasOption(DefaultOptionCreator.METHOD_OPTION) && getOption(DefaultOptionCreator.METHOD_OPTION).equals("sequential")) {
+    if (hasOption(DefaultOptionCreator.METHOD_OPTION)
+        && getOption(DefaultOptionCreator.METHOD_OPTION).equals("sequential")) {
       new SequenceFilesFromLuceneStorage().run(lucene2SeqConf);
     } else {
       new SequenceFilesFromLuceneStorageMRJob().run(lucene2SeqConf);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java Wed Jun 12 20:44:19 2013
@@ -27,8 +27,8 @@ import org.apache.hadoop.mapreduce.lib.o
 import java.io.IOException;
 
 /**
- * Generates a sequence file from a Lucene index via MapReduce. Uses a specified id field as the key and a content field as the value.
- * Configure this class with a {@link LuceneStorageConfiguration} bean.
+ * Generates a sequence file from a Lucene index via MapReduce. Uses a specified id field as the key and a content field
+ * as the value. Configure this class with a {@link LuceneStorageConfiguration} bean.
  */
 public class SequenceFilesFromLuceneStorageMRJob {
 
@@ -36,7 +36,8 @@ public class SequenceFilesFromLuceneStor
     try {
       Configuration configuration = lucene2seqConf.serialize();
 
-      Job job = new Job(configuration, "LuceneIndexToSequenceFiles: " + lucene2seqConf.getIndexPaths() + " -> M/R -> " + lucene2seqConf.getSequenceFilesOutputPath());
+      Job job = new Job(configuration, "LuceneIndexToSequenceFiles: " + lucene2seqConf.getIndexPaths() + " -> M/R -> "
+          + lucene2seqConf.getSequenceFilesOutputPath());
 
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(Text.class);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java Wed Jun 12 20:44:19 2013
@@ -21,7 +21,7 @@ import static org.apache.commons.lang.St
  */
 public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, NullWritable, Text, Text> {
 
-  public enum DataStatus {EMPTY_KEY, EMPTY_VALUE, EMPTY_BOTH}
+  public enum DataStatus { EMPTY_KEY, EMPTY_VALUE, EMPTY_BOTH }
 
   private LuceneStorageConfiguration l2sConf;
   private SegmentReader segmentReader;
@@ -50,9 +50,9 @@ public class SequenceFilesFromLuceneStor
       context.getCounter(DataStatus.EMPTY_BOTH).increment(1);
       return;
     }
-    if (isBlank(theKey.toString())){
+    if (isBlank(theKey.toString())) {
       context.getCounter(DataStatus.EMPTY_KEY).increment(1);
-    } else if (isBlank(theValue.toString())){
+    } else if (isBlank(theValue.toString())) {
       context.getCounter(DataStatus.EMPTY_VALUE).increment(1);
     }
     context.write(theKey, theValue);
@@ -62,4 +62,4 @@ public class SequenceFilesFromLuceneStor
   protected void cleanup(Context context) throws IOException, InterruptedException {
     segmentReader.close();
   }
-}
\ No newline at end of file
+}

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java Wed Jun 12 20:44:19 2013
@@ -54,8 +54,6 @@ public class ConcatenateVectorsJob exten
   
   static final String MATRIXA_DIMS = "mahout.concatenatevectors.matrixA_dims";
   static final String MATRIXB_DIMS = "mahout.concatenatevectors.matrixB_dims";
-
-  private static final Logger LOG = LoggerFactory.getLogger(ConcatenateVectorsJob.class);
   
   private ConcatenateVectorsJob() {}
   

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java Wed Jun 12 20:44:19 2013
@@ -77,8 +77,9 @@ public class ConcatenateVectorsReducer e
 
     if (vOut == null) {
       vOut = new SequentialAccessSparseVector(dimsA + dimsB);
-      if (isNamed) 
+      if (isNamed) {
         vOut = new NamedVector(vOut, name);
+      }
     }
 
     if (vA != null) {
@@ -92,7 +93,8 @@ public class ConcatenateVectorsReducer e
   }
   
   private void appendVector(Vector vOut, Vector vIn, int offset) {
-    for (Vector.Element element : vIn.nonZeroes())
+    for (Vector.Element element : vIn.nonZeroes()) {
       vOut.set(element.index() + offset, element.get());
+    }
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java Wed Jun 12 20:44:19 2013
@@ -277,16 +277,14 @@ public class SplitInput extends Abstract
    */
   public void splitDirectory(Path inputDir) throws IOException, ClassNotFoundException, InterruptedException {
     Configuration conf = getConf();
-    if (conf == null) {
-      conf = new Configuration();
-    }
     splitDirectory(conf, inputDir);
   }
 
   /*
    * See also splitDirectory(Path inputDir)
    * */
-  public void splitDirectory(Configuration conf, Path inputDir) throws IOException, ClassNotFoundException, InterruptedException {
+  public void splitDirectory(Configuration conf, Path inputDir)
+    throws IOException, ClassNotFoundException, InterruptedException {
     FileSystem fs = inputDir.getFileSystem(conf);
     if (fs.getFileStatus(inputDir) == null) {
       throw new IOException(inputDir + " does not exist");
@@ -315,9 +313,6 @@ public class SplitInput extends Abstract
    */
   public void splitFile(Path inputFile) throws IOException {
     Configuration conf = getConf();
-    if (conf == null) {
-      conf = new Configuration();
-    }
     FileSystem fs = inputFile.getFileSystem(conf);
     if (fs.getFileStatus(inputFile) == null) {
       throw new IOException(inputFile + " does not exist");
@@ -655,9 +650,6 @@ public class SplitInput extends Abstract
 
     if (!useMapRed) {
       Configuration conf = getConf();
-      if (conf == null) {
-        conf = new Configuration();
-      }
       FileSystem fs = trainingOutputDirectory.getFileSystem(conf);
       FileStatus trainingOutputDirStatus = fs.getFileStatus(trainingOutputDirectory);
       Preconditions.checkArgument(trainingOutputDirStatus != null && trainingOutputDirStatus.isDir(),

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Wed Jun 12 20:44:19 2013
@@ -88,7 +88,7 @@ public abstract class AbstractClusterWri
       }
     });
 
-    Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>();
+    Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
 
     for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
       int index = vectorTerms.get(i).index;
@@ -128,7 +128,7 @@ public abstract class AbstractClusterWri
       }
     });
 
-    Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>();
+    Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
 
     for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
       int index = vectorTerms.get(i).index;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java Wed Jun 12 20:44:19 2013
@@ -33,9 +33,8 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
- * Converts an mbox mail archive into a group of Hadoop Sequence Files with equal size. The archive may optionally be gzipped or zipped.
- * @see org.apache.mahout.text.SequenceFilesFromMailArchives
- *
+ * Converts an mbox mail archive into a group of Hadoop Sequence Files with equal size. The archive may optionally be
+ * gzipped or zipped. @see org.apache.mahout.text.SequenceFilesFromMailArchives
  */
 public class MailProcessor {
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,8 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Version;
 import org.apache.mahout.common.lucene.TokenStreamIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -33,6 +35,8 @@ public class AnalyzerTransformer impleme
   private Analyzer analyzer;
   private String fieldName = "text";
 
+  private static final Logger log = LoggerFactory.getLogger(AnalyzerTransformer.class);
+
   public AnalyzerTransformer() {
     this(new StandardAnalyzer(Version.LUCENE_43), "text");
   }
@@ -65,7 +69,7 @@ public class AnalyzerTransformer impleme
       try {
         Closeables.close(ts, true);
       } catch (IOException e) {
-        //nothing
+        log.error(e.getMessage(), e);
       }
     }
     return result.toString();

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Wed Jun 12 20:44:19 2013
@@ -171,7 +171,7 @@ public final class Driver {
     // Turn the map of labels into a list order by order of appearance
     List<Entry<String, Integer>> attributes = Lists.newArrayList();
     attributes.addAll(arffModel.getLabelBindings().entrySet());
-    Collections.sort(attributes, new Comparator<Map.Entry<String, Integer>>(){
+    Collections.sort(attributes, new Comparator<Map.Entry<String, Integer>>() {
       @Override
       public int compare(Entry<String, Integer> t, Entry<String, Integer> t1) {
         return t.getValue().compareTo(t1.getValue());
@@ -219,13 +219,14 @@ public final class Driver {
     Map<String, Map<String, Integer>> nominalMap = arffModel.getNominalMap();
     // how many nominal attributes
     writer.write(String.valueOf(nominalMap.size()) + "\n");
-    for (String attribute : nominalMap.keySet()) {
+
+    for (Entry<String, Map<String, Integer>> entry : nominalMap.entrySet()) {
       // the label of this attribute
-      writer.write(attribute + "\n");
-      Set<Entry<String, Integer>> attributeValues = nominalMap.get(attribute).entrySet();
+      writer.write(entry.getKey() + "\n");
+      Set<Entry<String, Integer>> attributeValues = entry.getValue().entrySet();
       // how many values does this attribute have
       writer.write(attributeValues.size() + "\n");
-      for (Map.Entry<String, Integer> value : nominalMap.get(attribute).entrySet()) {
+      for (Map.Entry<String, Integer> value : attributeValues) {
         // the value and the value index
         writer.write(String.format("%s%s%s\n", value.getKey(), delimiter, value.getValue().toString()));
       }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Wed Jun 12 20:44:19 2013
@@ -31,6 +31,7 @@ import java.util.Set;
 import java.util.TreeSet;
 
 import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
@@ -214,7 +215,7 @@ public class ClusterLabels {
 
     }
 
-    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
+    List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList();
 
     int clusterSize = wvws.size();
 

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@
  */
 package org.apache.mahout.text;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -43,8 +44,8 @@ public abstract class AbstractLuceneStor
 
   protected Path indexPath1;
   protected Path indexPath2;
-  protected List<SingleFieldDocument> docs = new ArrayList<SingleFieldDocument>();
-  protected List<SingleFieldDocument> misshapenDocs = new ArrayList<SingleFieldDocument>();
+  protected List<SingleFieldDocument> docs = Lists.newArrayList();
+  protected List<SingleFieldDocument> misshapenDocs = Lists.newArrayList();
 
   @Override
   public void setUp() throws Exception {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java Wed Jun 12 20:44:19 2013
@@ -62,7 +62,7 @@ public class LuceneSegmentInputSplitTest
   }
 
   @Test(expected = IllegalArgumentException.class)
-  public void testGetSegment_nonExistingSegment() throws Exception {
+  public void testGetSegmentNonExistingSegment() throws Exception {
     SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
     SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
     SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public class LuceneStorageConfigurationT
   }
   
   @Test(expected = IllegalArgumentException.class)
-  public void testSerialization_notSerialized() throws IOException {
+  public void testSerializationNotSerialized() throws IOException {
     new LuceneStorageConfiguration(new Configuration());
   }
 }

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java Wed Jun 12 20:44:19 2013
@@ -121,7 +121,7 @@ public class SequenceFilesFromLuceneStor
   }
 
   @Test
-  public void testRun_optionalArguments() throws Exception {
+  public void testRunOptionalArguments() throws Exception {
     String[] args = {
       "-i", getIndexPath1AsFile().toString(),
       "-o", seqFilesOutputPath.toString(),
@@ -144,7 +144,7 @@ public class SequenceFilesFromLuceneStor
   }
 
   @Test
-  public void testRun_invalidQuery() throws Exception {
+  public void testRunInvalidQuery() throws Exception {
     String[] args = {
       "-i", getIndexPath1AsFile().toString(),
       "-o", seqFilesOutputPath.toString(),

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java Wed Jun 12 20:44:19 2013
@@ -100,7 +100,7 @@ public class SequenceFilesFromLuceneStor
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testRun_skipUnstoredFields() throws IOException {
+  public void testRunSkipUnstoredFields() throws IOException {
     commitDocuments(getDirectory(getIndexPath1AsFile()), new UnstoredFieldsDocument("5", "This is test document 5"));
 
     LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
@@ -119,7 +119,7 @@ public class SequenceFilesFromLuceneStor
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testRun_maxHits() throws IOException {
+  public void testRunMaxHits() throws IOException {
     commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
     commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(1000, 1500));
 
@@ -141,7 +141,7 @@ public class SequenceFilesFromLuceneStor
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testRun_query() throws IOException {
+  public void testRunQuery() throws IOException {
     commitDocuments(getDirectory(getIndexPath1AsFile()), docs);
     LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
       asList(getIndexPath1()),
@@ -162,7 +162,7 @@ public class SequenceFilesFromLuceneStor
   }
 
   @Test
-  public void testRun_multipleFields() throws IOException {
+  public void testRunMultipleFields() throws IOException {
     LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
       asList(getIndexPath1()),
       seqFilesOutputPath,
@@ -184,7 +184,7 @@ public class SequenceFilesFromLuceneStor
   }
 
   @Test
-  public void testRun_numericField() throws IOException {
+  public void testRunNumericField() throws IOException {
     LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
       asList(getIndexPath1()),
       seqFilesOutputPath,