You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/24 22:16:40 UTC

svn commit: r807361 [2/2] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/main/java/org/apache/mahout/classifier/cbayes/ core/src/main/java/org/a...

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java Mon Aug 24 20:16:37 2009
@@ -55,25 +55,22 @@
       CDRule mutated = mutation.mutate(new CDRule(rule), rng);
 
       // check the ranges
-      double min, max;
-      double value, newval;
-      int nbcats;
 
       for (int condInd = 0; condInd < mutated.getNbConditions(); condInd++) {
         int attrInd = CDRule.attributeIndex(condInd);
-        value = rule.getV(condInd);
-        newval = mutated.getV(condInd);
+        double value = rule.getV(condInd);
+        double newval = mutated.getV(condInd);
         modified = modified || (value != newval);
 
         if (dataset.isNumerical(attrInd)) {
-          min = dataset.getMin(attrInd);
-          max = dataset.getMax(attrInd);
+          double min = dataset.getMin(attrInd);
+          double max = dataset.getMax(attrInd);
 
           assertInRange(newval, min, max);
           assertTrue(Math.abs(newval - value) <= (max - min) * range);
 
         } else {
-          nbcats = dataset.getNbValues(attrInd);
+          int nbcats = dataset.getNbValues(attrInd);
 
           assertInRange(newval, 0, nbcats);
         }
@@ -84,7 +81,7 @@
     assertTrue(modified);
   }
 
-  private void assertInRange(double value, double min, double max) {
+  private static void assertInRange(double value, double min, double max) {
     TestCase.assertTrue("value < min", value >= min);
     TestCase.assertTrue("value > max", value <= max);
   }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java Mon Aug 24 20:16:37 2009
@@ -61,7 +61,7 @@
     }
   }
 
-  private void assertInRange(double value, double min, double max) {
+  private static void assertInRange(double value, double min, double max) {
     Assert.assertTrue("value < min", value >= min);
     Assert.assertTrue("value > max", value <= max);
   }
@@ -77,14 +77,14 @@
    * 
    */
   public void testWCondition() {
-    int n = 100; // repeat the test n times
 
     // the dataline has all its attributes set to 0d
     DataLine dl = EasyMock.createMock(DataLine.class);
-    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(0d).atLeastOnce();
+    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(0.0).atLeastOnce();
     EasyMock.replay(dl);
 
     // all the conditions are : attribut < 0
+    int n = 100; // repeat the test n times
     for (int nloop = 0; nloop < n; nloop++) {
       double thr = rng.nextDouble();
 
@@ -116,19 +116,19 @@
    * 
    */
   public void testOConditionNumerical() {
-    int n = 100; // repeat the test n times
 
     // the dataline has all its attributes set to 1d
     DataLine dl = EasyMock.createMock(DataLine.class);
-    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1d).atLeastOnce();
+    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1.0d).atLeastOnce();
     EasyMock.replay(dl);
 
+    int n = 100; // repeat the test n times
     for (int nloop = 0; nloop < n; nloop++) {
       mock.numericalDataset();
 
-      CDRule rule = new CDRule(0.);
+      CDRule rule = new CDRule(0.0);
       for (int condInd = 0; condInd < rule.getNbConditions(); condInd++) {
-        rule.setW(condInd, 1.); // all weights are 1 (active)
+        rule.setW(condInd, 1.0); // all weights are 1 (active)
         rule.setO(condInd, rng.nextBoolean());
         rule.setV(condInd, 0);
       }
@@ -152,21 +152,21 @@
    * 
    */
   public void testOConditionCategorical() {
-    int n = 100; // repeat the test n times
 
     // the dataline has all its attributes set to 1d
     DataLine dl = EasyMock.createMock(DataLine.class);
-    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1d).atLeastOnce();
+    EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1.0d).atLeastOnce();
     EasyMock.replay(dl);
 
     Random rng = new MersenneTwisterRNG();
+    int n = 100; // repeat the test n times
     for (int nloop = 0; nloop < n; nloop++) {
       mock.categoricalDataset();
 
       // all weights are 1 (active)
-      CDRule rule = new CDRule(0.);
+      CDRule rule = new CDRule(0.0);
       for (int condInd = 0; condInd < rule.getNbConditions(); condInd++) {
-        rule.setW(condInd, 1.);
+        rule.setW(condInd, 1.0);
         rule.setO(condInd, rng.nextBoolean());
         rule.setV(condInd, rng.nextInt(2)); // two categories
       }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java Mon Aug 24 20:16:37 2009
@@ -42,39 +42,45 @@
 
     private long current;
 
-    private long size;
+    private final long size;
 
-    public MockReader(long size) {
+    MockReader(long size) {
       assert size > 0 : "size == 0";
 
       this.size = size;
     }
 
+    @Override
     public void close() throws IOException {
       // TODO Auto-generated method stub
 
     }
 
+    @Override
     public LongWritable createKey() {
       // TODO Auto-generated method stub
       return null;
     }
 
+    @Override
     public Text createValue() {
       // TODO Auto-generated method stub
       return null;
     }
 
+    @Override
     public long getPos() throws IOException {
       // TODO Auto-generated method stub
       return 0;
     }
 
+    @Override
     public float getProgress() throws IOException {
       // TODO Auto-generated method stub
       return 0;
     }
 
+    @Override
     public boolean next(LongWritable key, Text value) throws IOException {
       if (current == size) {
         return false;
@@ -89,13 +95,11 @@
     int n = 20;
 
     for (int nloop = 0; nloop < n; nloop++) {
-      long datasetSize = 100;
       MersenneTwisterRNG rng = new MersenneTwisterRNG();
       byte[] seed = rng.getSeed();
       double threshold = rng.nextDouble();
 
       JobConf conf = new JobConf();
-      RndLineRecordReader rndReader;
       Set<Long> dataset = new HashSet<Long>();
       LongWritable key = new LongWritable();
       Text value = new Text();
@@ -104,7 +108,8 @@
 
       // read the training set
       split.storeJobParameters(conf);
-      rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
+      long datasetSize = 100;
+      RndLineRecordReader rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
       while (rndReader.next(key, value)) {
         assertTrue("duplicate line index", dataset.add(key.get()));
       }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java Mon Aug 24 20:16:37 2009
@@ -45,9 +45,8 @@
 
   private Descriptors randomDescriptors(int nbattributes, double numRate, double catRate) {
     char[] descriptors = new char[nbattributes];
-    double rnd;
     for (int index = 0; index < nbattributes; index++) {
-      rnd = rng.nextDouble();
+      double rnd = rng.nextDouble();
       if (rnd < numRate) {
         // numerical attribute
         descriptors[index] = 'N';
@@ -93,12 +92,10 @@
   private void randomDataset(FileSystem fs, Path input, Descriptors descriptors,
       Object[][] descriptions) throws IOException {
     int nbfiles = rng.nextInt(20) + 1;
-    FSDataOutputStream out;
-    BufferedWriter writer;
 
     for (int floop = 0; floop < nbfiles; floop++) {
-      out = fs.create(new Path(input, "file." + floop));
-      writer = new BufferedWriter(new OutputStreamWriter(out));
+      FSDataOutputStream out = fs.create(new Path(input, "file." + floop));
+      BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
 
       int nblines = rng.nextInt(200) + 1;
       for (int line = 0; line < nblines; line++) {
@@ -173,13 +170,13 @@
 
     // Start the tool
     List<String> result = new ArrayList<String>();
-    int rindex=0;
     CDInfosTool.gatherInfos(descriptors, inpath, result);
 
     // check the results
     Collection<String> target = new ArrayList<String>();
 
     assertEquals(nbNonIgnored(descriptors), result.size());
+    int rindex = 0;
     for (int index = 0; index < nbattrs; index++) {
       if (descriptors.isIgnored(index)) {
         continue;

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java Mon Aug 24 20:16:37 2009
@@ -34,10 +34,10 @@
     ToolMapper mapper = new ToolMapper();
 
     // no attribute is ignored
-    String dataline = "A1, A2, A3, A4, A5, A6";
     char[] descriptors = { 'N', 'N', 'C', 'C', 'N', 'N' };
 
     mapper.configure(descriptors);
+    String dataline = "A1, A2, A3, A4, A5, A6";
     value.set(dataline);
     mapper.map(key, value, output, null);
 
@@ -57,10 +57,10 @@
     ToolMapper mapper = new ToolMapper();
 
     // no attribute is ignored
-    String dataline = "A1, I, A3, I, I, A6";
     char[] descriptors = { 'N', 'I', 'C', 'I', 'I', 'N' };
 
     mapper.configure(descriptors);
+    String dataline = "A1, I, A3, I, I, A6";
     value.set(dataline);
     mapper.map(key, value, output, null);
 

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java Mon Aug 24 20:16:37 2009
@@ -29,11 +29,11 @@
  */
 public class MockDataSet {
 
-  private Random rng;
+  private final Random rng;
 
-  private int maxnba;
+  private final int maxnba;
 
-  private DataSet dataset;
+  private final DataSet dataset;
 
   /**
    * 

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java Mon Aug 24 20:16:37 2009
@@ -38,6 +38,7 @@
     this.rng = rng;
   }
 
+  @Override
   public int classify(DataLine dl) {
     int label = dl.getLabel();
     int prediction = rng.nextInt(2);

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java Mon Aug 24 20:16:37 2009
@@ -22,10 +22,13 @@
 import java.util.HashMap;
 import java.util.Map;
 
-public class RandomRuleResults {
+public final class RandomRuleResults {
 
   private static final Map<Integer, CDFitness> results = new HashMap<Integer, CDFitness>();
 
+  private RandomRuleResults() {
+  }
+
   public static synchronized void addResult(int ruleid, CDFitness fit) {
     CDFitness f = results.get(ruleid);
     if (f == null)

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Aug 24 20:16:37 2009
@@ -89,7 +89,7 @@
         FileSystem fs = FileSystem.get(path.toUri(), conf);
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
 
-        Writer writer = null;
+        Writer writer;
         if (cmdLine.hasOption(outputOpt)) {
           writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
         } else {
@@ -102,11 +102,11 @@
           sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
         }
         boolean countOnly = cmdLine.hasOption(countOpt);
-        long count = 0;
         Writable key = (Writable) reader.getKeyClass().newInstance();
         Writable value = (Writable) reader.getValueClass().newInstance();
         writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ").append(String.valueOf(value.getClass())).append(StringUtil.LINE_SEP);
         writer.flush();
+        long count = 0;
         if (countOnly == false) {
           while (reader.next(key, value)) {
             writer.append("Key: ").append(String.valueOf(key));

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Mon Aug 24 20:16:37 2009
@@ -40,9 +40,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
-import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -56,11 +54,13 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 public final class ClusterDumper {
 
   private static final Logger log = LoggerFactory.getLogger(ClusterDumper.class);
   private static final String LINE_SEP = System.getProperty("line.separator");
+  private static final Pattern TAB_PATTERN = Pattern.compile("\t");
 
   private ClusterDumper() {
   }
@@ -110,14 +110,14 @@
         JobClient client = new JobClient();
         JobConf conf = new JobConf(Job.class);
         client.setConf(conf);
-        Map<String, List<String>> clusterIdToPoints = null;
+        Map<String, List<String>> clusterIdToPoints;
         if (cmdLine.hasOption(pointsOpt)) {
           //read in the points
           clusterIdToPoints = readPoints(cmdLine.getValue(pointsOpt).toString(), conf);
         } else {
           clusterIdToPoints = Collections.emptyMap();
         }
-        Writer writer = null;
+        Writer writer;
         if (cmdLine.hasOption(outputOpt)){
           writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
         } else {
@@ -229,12 +229,12 @@
       result.add("dummyentry");
     }
     
-    String line = null;
+    String line;
     while ((line = reader.readLine()) != null) {
       if (line.startsWith("#")) {
         continue;
       }
-      String[] tokens = line.split("\t");
+      String[] tokens = TAB_PATTERN.split(line);
       if (tokens.length < 3) {
         continue;
       }
@@ -244,11 +244,11 @@
     return result;
   }
 
-  class TermIndexWeight {
+  static class TermIndexWeight {
     public int index = -1;
     public double weight = 0;
     
-    public TermIndexWeight(int index, double weight) {
+    TermIndexWeight(int index, double weight) {
       this.index = index;
       this.weight = weight;
     }    
@@ -261,7 +261,7 @@
     Iterator<Vector.Element> iter = vector.iterateNonZero();
       while (iter.hasNext()) {
         Vector.Element elt = iter.next();     
-        vectorTerms.add(new ClusterDumper().new TermIndexWeight(elt.index(), elt.get()));        
+        vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
       }
       
       // Sort results in reverse order (ie weight in descending order)

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java Mon Aug 24 20:16:37 2009
@@ -1,11 +1,23 @@
-package org.apache.mahout.utils.strings;
-
-
 /**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- **/
-public class StringUtil {
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.utils.strings;
+
+public interface StringUtil {
 
-  public static final String LINE_SEP = System.getProperty("line.separator");
+  String LINE_SEP = System.getProperty("line.separator");
 }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -31,10 +31,9 @@
  * <p/>
  * The key is any {@link org.apache.hadoop.io.Writable} and the value is a {@link org.apache.mahout.matrix.Vector}.
  * It can handle any class that implements Vector as long as it has a no-arg constructor.
- *
- **/
+ */
 public class SequenceFileVectorIterable implements VectorIterable {
-  private SequenceFile.Reader reader;
+  private final SequenceFile.Reader reader;
   private boolean transpose = false;
 
   public SequenceFileVectorIterable(SequenceFile.Reader reader) {
@@ -58,8 +57,8 @@
   }
 
   public class SeqFileIterator implements Iterator<Vector> {
-    private Writable key;
-    private Writable value;
+    private final Writable key;
+    private final Writable value;
 
     private SeqFileIterator() throws IllegalAccessException, InstantiationException {
       if (transpose == false){

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,15 +15,11 @@
  * limitations under the License.
  */
 
-
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.Similarity;
-
+package org.apache.mahout.utils.vectors;
 
 /**
  * {@link org.apache.mahout.utils.vectors.Weight} based on term frequency only 
- *
- **/
+ */
 public class TF implements Weight {
 
   @Override

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,15 +15,11 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors;
 
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
 
-
-/**
- *
- *
- **/
 public class TFIDF implements Weight {
 
   private Similarity sim = new DefaultSimilarity();

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,15 +15,12 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors;
 
-/**
- *
- *
- **/
 public class TermEntry {
-  public String term;
-  public int termIdx;
-  public int docFreq;
+  public final String term;
+  public final int termIdx;
+  public final int docFreq;
 
   public TermEntry(String term, int termIdx, int docFreq) {
     this.term = term;

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,12 +15,10 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors;
+
 import java.util.Iterator;
 
-/**
- *
- *
- **/
 public interface TermInfo {
 
   int totalTerms(String field);

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Mon Aug 24 20:16:37 2009
@@ -46,8 +46,7 @@
  * Can read in a {@link org.apache.hadoop.io.SequenceFile} of {@link org.apache.mahout.matrix.Vector}s
  * and dump out the results using {@link org.apache.mahout.matrix.Vector#asFormatString()} to either the console
  * or to a file.
- *
- **/
+ */
 public final class VectorDumper {
 
   private static final Logger log = LoggerFactory.getLogger(VectorDumper.class);
@@ -97,7 +96,7 @@
         FileSystem fs = FileSystem.get(path.toUri(), conf);
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
         SequenceFileVectorIterable vectorIterable = new SequenceFileVectorIterable(reader, cmdLine.hasOption(vectorAsKeyOpt));
-        Writer writer = null;
+        Writer writer;
         if (cmdLine.hasOption(outputOpt)) {
           writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
         } else {

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,12 +15,9 @@
  * limitations under the License.
  */
 
-import org.apache.mahout.matrix.Vector;
+package org.apache.mahout.utils.vectors;
 
+import org.apache.mahout.matrix.Vector;
 
-/**
- *
- *
- **/
 public interface VectorIterable extends Iterable<Vector>{
 }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,11 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors;
 
-/**
- *
- *
- **/
 public interface Weight {
 
   /**

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.arff;
+
 import java.util.Map;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
@@ -25,15 +26,14 @@
  * An interface for representing an ARFFModel.  Implementations can decide on the best approach
  * for storing the model, as some approaches will be fine for smaller files, while larger
  * ones may require a better implementation.
- *
- **/
+ */
 public interface ARFFModel {
-  public static final DateFormat DEFAULT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
-  public static final String ARFF_SPARSE = "{";//indicates the vector is sparse
-  public static final String ARFF_COMMENT = "%";
-  public static final String ATTRIBUTE = "@attribute";
-  public static final String DATA = "@data";
-  public static final String RELATION = "@relation";
+  DateFormat DEFAULT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+  String ARFF_SPARSE = "{";//indicates the vector is sparse
+  String ARFF_COMMENT = "%";
+  String ATTRIBUTE = "@attribute";
+  String DATA = "@data";
+  String RELATION = "@relation";
 
 
   String getRelation();

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java Mon Aug 24 20:16:37 2009
@@ -1,9 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.utils.vectors.arff;
 
 public enum ARFFType {
   NUMERIC("numeric"), NOMINAL("{"), DATE("date"), STRING("string");
 
-  private String indicator;
+  private final String indicator;
   ARFFType(String indicator) {
     this.indicator = indicator;
   }
@@ -12,8 +29,6 @@
     return indicator;
   }
 
-  
-
   public String getLabel(String line) {
     int idx = line.indexOf(indicator);
     return line.substring(ARFFModel.ATTRIBUTE.length(),

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.arff;
+
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
@@ -33,10 +34,11 @@
 import java.text.SimpleDateFormat;
 import java.text.DateFormat;
 import java.util.Iterator;
+import java.util.regex.Pattern;
 
 
 /**
- * Read in ARFF (http://www.cs.waikato.ac.nz/~ml/weka/arff.html) and create {@link org.apache.mahout.matrix.Vector}s
+ * Read in ARFF (http://www.cs.waikato.ac.nz/~ml/weka/arff.html) and create {@link Vector}s
  * <p/>
  * Attribute type handling:
  * <ul>
@@ -54,9 +56,10 @@
  */
 public class ARFFVectorIterable implements VectorIterable {
 
-  protected BufferedReader buff;
-  protected boolean inData;
-  protected ARFFModel model;
+  private final BufferedReader buff;
+  private final ARFFModel model;
+  private static final Pattern COMMA_PATTERN = Pattern.compile(",");
+  private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
 
 
   public ARFFVectorIterable(File file, ARFFModel model) throws IOException {
@@ -78,47 +81,47 @@
       buff = new BufferedReader(reader);
     }
     //grab the attributes, then start the iterator at the first line of data
-    String line = null;
-    int labelNumber = 0;
-    inData = false;
     this.model = model;
 
+    int labelNumber = 0;
+    String line;
+    boolean inData = false;
     while ((line = buff.readLine()) != null) {
       line = line.trim();
       String lower = line.toLowerCase();
-      ARFFType type;
-      Integer labelNumInt = new Integer(labelNumber);
+      Integer labelNumInt = labelNumber;
       if (lower.startsWith(ARFFModel.ARFF_COMMENT)) {
         continue;
       } else if (lower.startsWith(ARFFModel.RELATION)) {
         model.setRelation(line.substring(ARFFModel.RELATION.length()).trim());
       } else if (lower.startsWith(ARFFModel.ATTRIBUTE)) {
         String label;
-        if (lower.indexOf(ARFFType.NUMERIC.getIndicator()) != -1) {
+        ARFFType type;
+        if (lower.contains(ARFFType.NUMERIC.getIndicator())) {
           label = ARFFType.NUMERIC.getLabel(lower);
           type = ARFFType.NUMERIC;
-        } else if (lower.indexOf(ARFFType.STRING.getIndicator()) != -1) {
+        } else if (lower.contains(ARFFType.STRING.getIndicator())) {
           label = ARFFType.STRING.getLabel(lower);
           type = ARFFType.STRING;
           //TODO: create a map so we know which
 
-        } else if (lower.indexOf(ARFFType.NOMINAL.getIndicator()) != -1) {
+        } else if (lower.contains(ARFFType.NOMINAL.getIndicator())) {
           label = ARFFType.NOMINAL.getLabel(lower);
           type = ARFFType.NOMINAL;
           //@ATTRIBUTE class        {Iris-setosa,Iris-versicolor,Iris-virginica}
           int classIdx = lower.indexOf(ARFFType.NOMINAL.getIndicator());
-          String [] classes = line.substring(classIdx + 1, line.length() - 1).split(",");
+          String [] classes = COMMA_PATTERN.split(line.substring(classIdx + 1, line.length() - 1));
           for (int i = 0; i < classes.length; i++) {
             model.addNominal(label, classes[i].trim(), i);
           }
 
-        } else if (lower.indexOf(ARFFType.DATE.getIndicator()) != -1) {
+        } else if (lower.contains(ARFFType.DATE.getIndicator())) {
           label = ARFFType.DATE.getLabel(lower);
           type = ARFFType.DATE;
           //TODO: DateFormatter map
           DateFormat format = ARFFModel.DEFAULT_DATE_FORMAT;
           int idx = lower.indexOf(ARFFType.DATE.getIndicator());
-          String[] split = line.split(" ");
+          String[] split = SPACE_PATTERN.split(line);
           if (split.length >= 4) {//we have a date format
             String formStr = line.substring(idx + ARFFType.DATE.getIndicator().length()).trim();
             if (formStr.startsWith("\"")) {
@@ -161,7 +164,7 @@
       try {
         while ((line = buff.readLine()) != null) {
           line = line.trim();
-          if (line.equals("") == false && line.startsWith(ARFFModel.ARFF_COMMENT) == false) {
+          if (line.length() > 0 && line.startsWith(ARFFModel.ARFF_COMMENT) == false) {
             break;
           }
         }
@@ -176,19 +179,19 @@
 
     @Override
     public Vector next() {
-      Vector result = null;
+      Vector result;
       if (line.startsWith(ARFFModel.ARFF_SPARSE)) {
         line = line.substring(1, line.length() - 1);
-        String[] splits = line.split(",");
+        String[] splits = COMMA_PATTERN.split(line);
         result = new SparseVector(model.getLabelSize());
-        for (int i = 0; i < splits.length; i++) {
-          String[] data = splits[i].split(" ");//first is index, second is
+        for (String split : splits) {
+          String[] data = SPACE_PATTERN.split(split); // first is index, second is
           int idx = Integer.parseInt(data[0]);
           result.setQuick(idx, model.getValue(data[1], idx));
         }
       } else {
         result = new DenseVector(model.getLabelSize());
-        String[] splits = line.split(",");
+        String[] splits = COMMA_PATTERN.split(line);
         for (int i = 0; i < splits.length; i++) {
           result.setQuick(i, model.getValue(splits[i], i));
         }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.arff;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -49,12 +50,11 @@
 import java.util.Map;
 
 
-/**
- *
- *
- **/
 public class Driver {
-  private transient static Logger log = LoggerFactory.getLogger(Driver.class);
+  private static final Logger log = LoggerFactory.getLogger(Driver.class);
+
+  private Driver() {
+  }
 
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -125,9 +125,7 @@
             }
           });
 
-          for (int i = 0; i < files.length; i++) {
-            File file = files[i];
-
+          for (File file : files) {
             writeFile(outWriter, outDir, file, maxDocs, model);
           }
         } else {
@@ -154,7 +152,7 @@
     ARFFModel model = new MapBackedARFFModel(arffModel.getWords(), arffModel.getWordCount() + 1,
             arffModel.getNominalMap());
     ARFFVectorIterable iteratable = new ARFFVectorIterable(file, model);
-    String outFile = outDir + "/" + file.getName() + ".mvc";
+    String outFile = outDir + '/' + file.getName() + ".mvc";
 
     VectorWriter vectorWriter;
     if (outWriter != null) {
@@ -174,12 +172,11 @@
   }
 
   private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
-    VectorWriter sfWriter;
     Path path = new Path(outFile);
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
     SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, SparseVector.class);
-    sfWriter = new SequenceFileVectorWriter(seqWriter);
+    VectorWriter sfWriter = new SequenceFileVectorWriter(seqWriter);
     return sfWriter;
   }
 

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,29 +15,34 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.arff;
+
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Collections;
+import java.util.regex.Pattern;
 
 
 /**
- * Holds ARFF information in {@link java.util.Map}.
+ * Holds ARFF information in {@link Map}.
  */
 public class MapBackedARFFModel implements ARFFModel {
 
-  protected long wordCount = 1;
+  private static final Pattern QUOTE_PATTERN = Pattern.compile("\"");
 
-  protected String relation;
+  private long wordCount = 1;
 
-  private Map<String, Integer> labelBindings;
-  private Map<Integer, String> idxLabel;
-  private Map<Integer, ARFFType> typeMap; //key is the vector index, value is the type
-  private Map<Integer, DateFormat> dateMap;
-  private Map<String, Map<String, Integer>> nominalMap;
-  private Map<String, Long> words;
+  private String relation;
+
+  private final Map<String, Integer> labelBindings;
+  private final Map<Integer, String> idxLabel;
+  private final Map<Integer, ARFFType> typeMap; //key is the vector index, value is the type
+  private final Map<Integer, DateFormat> dateMap;
+  private final Map<String, Map<String, Integer>> nominalMap;
+  private final Map<String, Long> words;
 
   public MapBackedARFFModel() {
     this(new HashMap<String, Long>(), 1, new HashMap<String, Map<String, Integer>>());
@@ -55,10 +59,12 @@
 
   }
 
+  @Override
   public String getRelation() {
     return relation;
   }
 
+  @Override
   public void setRelation(String relation) {
     this.relation = relation;
   }
@@ -70,11 +76,12 @@
    * @param idx  The position in the ARFF data
    * @return A double representing the data
    */
+  @Override
   public double getValue(String data, int idx) {
-    double result = 0;
     ARFFType type = typeMap.get(idx);
-    data = data.replaceAll("\"", "");
+    data = QUOTE_PATTERN.matcher(data).replaceAll("");
     data = data.trim();
+    double result = 0.0;
     switch (type) {
       case NUMERIC: {
         result = processNumeric(data);
@@ -125,31 +132,31 @@
    */
   //Not sure how scalable this is going to be
   protected double processString(String data) {
-    double result;
-    data = data.replaceAll("\"", "");
+    data = QUOTE_PATTERN.matcher(data).replaceAll("");
     //map it to an long
     Long theLong = words.get(data);
     if (theLong == null) {
       theLong = wordCount++;
       words.put(data, theLong);
     }
-    result = theLong;
-    return result;
+    return theLong;
   }
 
-  protected double processNumeric(String data) {
+  protected static double processNumeric(String data) {
     return Double.parseDouble(data);
   }
 
   protected double processDate(String data, int idx) {
-    double result;
     DateFormat format = dateMap.get(idx);
     if (format == null) {
       format = DEFAULT_DATE_FORMAT;
     }
-    Date date = null;
+    double result;
     try {
-      date = format.parse(data);
+      Date date;
+      synchronized (format) {
+        date = format.parse(data);
+      }
       result = date.getTime();// hmmm, what kind of loss casting long to double?
     } catch (ParseException e) {
       throw new RuntimeException(e);
@@ -161,6 +168,7 @@
    * The vector attributes (labels in Mahout speak), unmodifiable
    * @return the map
    */
+  @Override
   public Map<String, Integer> getLabelBindings() {
     return Collections.unmodifiableMap(labelBindings);
   }
@@ -185,6 +193,7 @@
    * Map nominals to ids.  Should only be modified by calling {@link ARFFModel#addNominal(String, String, int)}
    * @return the map
    */
+  @Override
   public Map<String, Map<String, Integer>> getNominalMap() {
     return nominalMap;
   }
@@ -193,14 +202,17 @@
    * Immutable map of words to the long id used for those words
    * @return The map
    */
+  @Override
   public Map<String, Long> getWords() {
     return words;
   }
 
+  @Override
   public Integer getNominalValue(String label, String nominal){
     return nominalMap.get(label).get(nominal);
   }
 
+  @Override
   public void addNominal(String label, String nominal, int idx) {
     Map<String, Integer> noms = nominalMap.get(label);
     if (noms == null) {
@@ -210,27 +222,33 @@
     noms.put(nominal, idx);
   }
 
+  @Override
   public DateFormat getDateFormat(Integer idx){
     return dateMap.get(idx);
   }
 
+  @Override
   public void addDateFormat(Integer idx, DateFormat format) {
     dateMap.put(idx, format);
   }
 
+  @Override
   public Integer getLabelIndex(String label){
     return labelBindings.get(label);
   }
 
+  @Override
   public void addLabel(String label, Integer idx) {
     labelBindings.put(label, idx);
     idxLabel.put(idx, label);
   }
 
+  @Override
   public ARFFType getARFFType(Integer idx){
     return typeMap.get(idx);
   }
 
+  @Override
   public void addType(Integer idx, ARFFType type) {
     typeMap.put(idx, type);
   }
@@ -239,10 +257,12 @@
    * The count of the number of words seen
    * @return the count
    */
+  @Override
   public long getWordCount() {
     return wordCount;
   }
 
+  @Override
   public int getLabelSize() {
     return labelBindings.size();
   }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java Mon Aug 24 20:16:37 2009
@@ -27,13 +27,12 @@
 
 /**
  * Write ther TermInfo out to a {@link java.io.Writer}
- *
- **/
+ */
 public class JWriterTermInfoWriter implements TermInfoWriter {
 
-  protected Writer writer;
-  protected String delimiter;
-  protected String field;
+  private final Writer writer;
+  private final String delimiter;
+  private final String field;
 
   public JWriterTermInfoWriter(Writer writer, String delimiter, String field) {
     this.writer = writer;

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java Mon Aug 24 20:16:37 2009
@@ -24,7 +24,7 @@
 import java.io.Writer;
 
 public class JWriterVectorWriter implements VectorWriter {
-  protected Writer writer;
+  private final Writer writer;
 
   public JWriterVectorWriter(Writer writer) {
     this.writer = writer;

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java Mon Aug 24 20:16:37 2009
@@ -27,10 +27,9 @@
 
 /**
  * Closes the writer when done
- *
- **/
+ */
 public class SequenceFileVectorWriter implements VectorWriter {
-  protected SequenceFile.Writer writer;
+  private final SequenceFile.Writer writer;
 
   public SequenceFileVectorWriter(SequenceFile.Writer writer) {
     this.writer = writer;

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.Term;
@@ -24,27 +25,26 @@
 
 import java.util.Map;
 import java.util.Iterator;
-import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.io.IOException;
 
 
 /**
  * Caches TermEntries from a single field.  Materializes all values in the TermEnum to memory (much like FieldCache)
- *
- **/
+ */
 public class CachedTermInfo implements TermInfo {
 
-  Map<String, TermEntry> termEntries;
-  String field;
+  private final Map<String, TermEntry> termEntries;
+  private final String field;
+
   public CachedTermInfo(IndexReader reader, String field, int minDf, int maxDfPercent) throws IOException {
     this.field = field;
     TermEnum te = reader.terms(new Term(field, ""));
-    int count = 0;
     int numDocs = reader.numDocs();
     double percent = numDocs * maxDfPercent / 100.0;
     //Should we use a linked hash map so that we know terms are in order?
     termEntries = new LinkedHashMap<String, TermEntry>();
+    int count = 0;
     do {
       Term term = te.term();
       if (term == null || term.field().equals(field) == false){

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -53,13 +54,11 @@
 import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
 
-
-/**
- *
- *
- **/
 public class Driver {
-  private transient static Logger log = LoggerFactory.getLogger(Driver.class);
+  private static final Logger log = LoggerFactory.getLogger(Driver.class);
+
+  private Driver() {
+  }
 
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -139,7 +138,7 @@
           }
           Directory dir = FSDirectory.open(file);
           IndexReader reader = IndexReader.open(dir, true);
-          Weight weight = null;
+          Weight weight;
           if (cmdLine.hasOption(weightOpt)) {
             String wString = cmdLine.getValue(weightOpt).toString();
             if (wString.equalsIgnoreCase("tf")) {
@@ -163,11 +162,9 @@
           }
           TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);
           VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
-          LuceneIterable iterable = null;
-          String power = null;
           double norm = -1;
           if (cmdLine.hasOption(powerOpt)) {
-            power = cmdLine.getValue(powerOpt).toString();
+            String power = cmdLine.getValue(powerOpt).toString();
             if (power.equals("INF")) {
               norm = Double.POSITIVE_INFINITY;
             } else {
@@ -178,6 +175,7 @@
           if (cmdLine.hasOption(idFieldOpt)) {
             idField = cmdLine.getValue(idFieldOpt).toString();
           }
+          LuceneIterable iterable;
           if (norm == LuceneIterable.NO_NORMALIZING) {
             iterable = new LuceneIterable(reader, idField, field, mapper, LuceneIterable.NO_NORMALIZING);
           } else {
@@ -221,15 +219,13 @@
   }
 
   private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
-    VectorWriter sfWriter;
     Path path = new Path(outFile);
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
     //TODO: Make this parameter driven
     SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, SparseVector.class);
 
-    sfWriter = new SequenceFileVectorWriter(seqWriter);
-    return sfWriter;
+    return new SequenceFileVectorWriter(seqWriter);
   }
 
 

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.document.FieldSelector;
@@ -27,14 +28,8 @@
 import java.util.Iterator;
 import java.util.Collections;
 
-
-/**
- *
- *
- **/
 public class LuceneIterable implements VectorIterable {
 
-
   private IndexReader indexReader;
   private String field;
   private String idField;
@@ -80,7 +75,7 @@
   }
 
   private class TDIterator implements Iterator<Vector> {
-    private TermDocs termDocs;
+    private final TermDocs termDocs;
 
     private TDIterator() throws IOException {
       //term docs(null) is a better way of iterating all the docs in Lucene
@@ -98,7 +93,7 @@
 
     @Override
     public Vector next() {
-      Vector result = null;
+      Vector result;
       int doc = termDocs.doc();
       //
       try {

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import org.apache.mahout.matrix.SparseVector;
@@ -32,14 +33,14 @@
 
   public static final int DEFAULT_CACHE_SIZE = 256;
 
-  protected IndexReader reader;
-  protected Vector vector;
+  private final IndexReader reader; // TODO never used?
+  private Vector vector;
 
-  protected Weight weight;
-  protected int numTerms;
-  protected TermInfo termInfo;
+  private final Weight weight;
+  private int numTerms;
+  private final TermInfo termInfo;
   private String field;
-  private int numDocs;
+  private final int numDocs;
 
   public TFDFMapper(IndexReader reader, Weight weight, TermInfo termInfo) {
     this.reader = reader;
@@ -48,6 +49,7 @@
     this.numDocs = reader.numDocs();
   }
 
+  @Override
   public Vector getVector() {
     return vector;
   }
@@ -62,8 +64,9 @@
   @Override
   public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     TermEntry entry = termInfo.getTermEntry(field, term);
-    if(entry != null)
+    if (entry != null) {
       vector.setQuick(entry.termIdx, weight.calculate(frequency, entry.docFreq, numTerms, numDocs));
+    }
   }
 
   @Override

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,14 +15,14 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import org.apache.lucene.index.TermVectorMapper;
 import org.apache.mahout.matrix.Vector;
 
-
 /**
  * Not thread-safe
- *
- **/
+ */
 public abstract class VectorMapper extends TermVectorMapper {
   /**
    * Can be called after the TermVector has been mapped

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -27,10 +27,10 @@
 
 public class RandomVectorIterable implements VectorIterable{
 
-  int numItems = 100;
-  public static enum VectorType {DENSE, SPARSE};
+  private int numItems = 100;
+  public enum VectorType {DENSE, SPARSE}
 
-  VectorType type = VectorType.SPARSE;
+  private VectorType type = VectorType.SPARSE;
 
   public RandomVectorIterable() {
   }
@@ -51,7 +51,7 @@
 
   private class VectIterator implements Iterator<Vector>{
     int count = 0;
-    Random random = new Random();
+    final Random random = new Random();
     @Override
     public boolean hasNext() {
       return count < numItems;
@@ -59,7 +59,7 @@
 
     @Override
     public Vector next() {
-      Vector result = type.equals(VectorType.SPARSE) ? new SparseVector(numItems) : new DenseVector(numItems);
+      Vector result = type == VectorType.SPARSE ? new SparseVector(numItems) : new DenseVector(numItems);
       result.assign(new UnaryFunction(){
         @Override
         public double apply(double arg1) {

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.arff;
+
 import junit.framework.TestCase;
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.SparseVector;
@@ -26,11 +27,6 @@
 import java.util.Iterator;
 import java.util.Map;
 
-
-/**
- *
- *
- **/
 public class ARFFVectorIterableTest extends TestCase {
 
   public void testValues() throws Exception {
@@ -47,30 +43,30 @@
             .append("{0 5,1 23}").append(StringUtil.LINE_SEP);
     ARFFModel model = new MapBackedARFFModel();
     ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model);
-    assertTrue(iterable.getModel().getRelation() + " is not equal to " + "Mahout", iterable.getModel().getRelation().equals("Mahout") == true);
+    assertEquals("Mahout", iterable.getModel().getRelation());
     Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
     assertNotNull(bindings);
-    assertTrue("bindings Size: " + bindings.size() + " is not: " + 5, bindings.size() == 5);
+    assertEquals(5, bindings.size());
     Iterator<Vector> iter = iterable.iterator();
     assertTrue(iter.hasNext());
     Vector next = iter.next();
     assertNotNull(next);
     assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals("", next.get(0), 1.0);
-    assertEquals("", next.get(1), 2.0);
+    assertEquals(1.0, next.get(0));
+    assertEquals(2.0, next.get(1));
     assertTrue(iter.hasNext());
     next = iter.next();
     assertNotNull(next);
     assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals("", next.get(0), 2.0);
-    assertEquals("", next.get(1), 3.0);
+    assertEquals(2.0, next.get(0));
+    assertEquals(3.0, next.get(1));
 
     assertTrue(iter.hasNext());
     next = iter.next();
     assertNotNull(next);
     assertTrue("Wrong instanceof", next instanceof SparseVector);
-    assertEquals("", next.get(0), 5.0);
-    assertEquals("", next.get(1), 23.0);
+    assertEquals(5.0, next.get(0));
+    assertEquals(23.0, next.get(1));
 
     assertFalse(iter.hasNext());
   }
@@ -83,7 +79,7 @@
       assertTrue("Vector is not dense", vector instanceof DenseVector);
       count++;
     }
-    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertEquals(10, count);
   }
 
   public void testSparse() throws Exception {
@@ -94,7 +90,7 @@
       assertTrue("Vector is not dense", vector instanceof SparseVector);
       count++;
     }
-    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertEquals(10, count);
   }
 
   public void testNonNumeric() throws Exception {
@@ -106,23 +102,23 @@
       assertTrue("Vector is not dense", vector instanceof SparseVector);
       count++;
     }
-    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertEquals(10, count);
     Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
     assertNotNull(nominalMap);
-    assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 1, nominalMap.size() == 1);
+    assertEquals(1, nominalMap.size());
     Map<String, Integer> noms = nominalMap.get("bar");
     assertNotNull("nominals for bar are null", noms);
-    assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+    assertEquals(2, noms.size());
     Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
     assertNotNull("Type map null", integerARFFTypeMap);
-    assertTrue("integerARFFTypeMap Size: " + integerARFFTypeMap.size() + " is not: " + 5, integerARFFTypeMap.size() == 5);
+    assertEquals(5, integerARFFTypeMap.size());
     Map<String, Long> words = model.getWords();
     assertNotNull("words null", words);
-    assertTrue("words Size: " + words.size() + " is not: " + 10, words.size() == 10);
+    assertEquals(10, words.size());
     System.out.println("Words: " + words);
     Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
     assertNotNull("date format null", integerDateFormatMap);
-    assertTrue("integerDateFormatMap Size: " + integerDateFormatMap.size() + " is not: " + 1, integerDateFormatMap.size() == 1);
+    assertEquals(1, integerDateFormatMap.size());
 
   }
 
@@ -134,23 +130,23 @@
       assertTrue("Vector is not dense", vector instanceof SparseVector);
       count++;
     }
-    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertEquals(10, count);
     Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
     assertNotNull(nominalMap);
-    assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 1, nominalMap.size() == 1);
+    assertEquals(1, nominalMap.size());
     Map<String, Integer> noms = nominalMap.get("bar");
     assertNotNull("nominals for bar are null", noms);
-    assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+    assertEquals(2, noms.size());
     Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
     assertNotNull("Type map null", integerARFFTypeMap);
-    assertTrue("integerARFFTypeMap Size: " + integerARFFTypeMap.size() + " is not: " + 5, integerARFFTypeMap.size() == 5);
+    assertEquals(5, integerARFFTypeMap.size());
     Map<String, Long> words = model.getWords();
     assertNotNull("words null", words);
-    assertTrue("words Size: " + words.size() + " is not: " + 10, words.size() == 10);
+    assertEquals(10, words.size());
     System.out.println("Words: " + words);
     Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
     assertNotNull("date format null", integerDateFormatMap);
-    assertTrue("integerDateFormatMap Size: " + integerDateFormatMap.size() + " is not: " + 1, integerDateFormatMap.size() == 1);
+    assertEquals(1, integerDateFormatMap.size());
     model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
             model.getNominalMap());
     iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
@@ -161,26 +157,26 @@
     }
     nominalMap = model.getNominalMap();
     assertNotNull(nominalMap);
-    assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 2, nominalMap.size() == 2);
+    assertEquals(2, nominalMap.size());
     noms = nominalMap.get("test");
     assertNotNull("nominals for bar are null", noms);
-    assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+    assertEquals(2, noms.size());
   }
 
 
-  public static final String SAMPLE_DENSE_ARFF = "   % Comments\n" +
+  private static final String SAMPLE_DENSE_ARFF = "   % Comments\n" +
           "   % \n" +
           "   % Comments go here" +
           "   % \n" +
           "   @RELATION Mahout\n" +
-          "\n" +
+          '\n' +
           "   @ATTRIBUTE foo  NUMERIC\n" +
           "   @ATTRIBUTE bar   NUMERIC\n" +
           "   @ATTRIBUTE hockey  NUMERIC\n" +
           "   @ATTRIBUTE football   NUMERIC\n" +
           "  \n" +
-          "\n" +
-          "\n" +
+          '\n' +
+          '\n' +
           "   @DATA\n" +
           "   23.1,3.23,1.2,0.2\n" +
           "   2.9,3.0,1.2,0.2\n" +
@@ -194,20 +190,20 @@
           "   2.9,3.1,1.23,0.1\n";
 
 
-  public static final String SAMPLE_SPARSE_ARFF = "   % Comments\n" +
+  private static final String SAMPLE_SPARSE_ARFF = "   % Comments\n" +
           "   % \n" +
           "   % Comments go here" +
           "   % \n" +
           "   @RELATION Mahout\n" +
-          "\n" +
+          '\n' +
           "   @ATTRIBUTE foo  NUMERIC\n" +
           "   @ATTRIBUTE bar   NUMERIC\n" +
           "   @ATTRIBUTE hockey  NUMERIC\n" +
           "   @ATTRIBUTE football   NUMERIC\n" +
           "   @ATTRIBUTE tennis   NUMERIC\n" +
           "  \n" +
-          "\n" +
-          "\n" +
+          '\n' +
+          '\n' +
           "   @DATA\n" +
           "   {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
           "   {0 2.9}\n" +
@@ -220,20 +216,20 @@
           "   {1 2.2,2 2.94 0.2}\n" +
           "   {1 2.9,2 3.1}\n";
 
-  public static final String NON_NUMERIC_ARFF = "   % Comments\n" +
+  private static final String NON_NUMERIC_ARFF = "   % Comments\n" +
           "   % \n" +
           "   % Comments go here" +
           "   % \n" +
           "   @RELATION Mahout\n" +
-          "\n" +
+          '\n' +
           "   @ATTRIBUTE junk  NUMERIC\n" +
           "   @ATTRIBUTE foo  NUMERIC\n" +
           "   @ATTRIBUTE bar   {c,d}\n" +
           "   @ATTRIBUTE hockey  string\n" +
           "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
           "  \n" +
-          "\n" +
-          "\n" +
+          '\n' +
+          '\n' +
           "   @DATA\n" +
           "   {2 c,3 gretzky,4 1973-10-23}\n" +
           "   {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
@@ -246,20 +242,20 @@
           "   {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
           "   {2 c,3 roy,4 1973-10-13}\n";
 
-  public static final String NON_NUMERIC_ARFF2 = "   % Comments\n" +
+  private static final String NON_NUMERIC_ARFF2 = "   % Comments\n" +
           "   % \n" +
           "   % Comments go here" +
           "   % \n" +
           "   @RELATION Mahout\n" +
-          "\n" +
+          '\n' +
           "   @ATTRIBUTE junk  NUMERIC\n" +
           "   @ATTRIBUTE foo  NUMERIC\n" +
           "   @ATTRIBUTE test   {f,z}\n" +
           "   @ATTRIBUTE hockey  string\n" +
           "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
           "  \n" +
-          "\n" +
-          "\n" +
+          '\n' +
+          '\n' +
           "   @DATA\n" +
           "   {2 f,3 gretzky,4 1973-10-23}\n" +
           "   {1 2.9,2 z,3 orr,4 1973-11-23}\n" +

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.vectors.lucene;
+
 import junit.framework.TestCase;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.IndexWriter;
@@ -30,12 +31,8 @@
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.matrix.SparseVector;
 
-/**
- *
- *
- **/
 public class LuceneIterableTest extends TestCase {
-  protected RAMDirectory directory;
+  private RAMDirectory directory;
 
   private static final String [] DOCS = {
         "The quick red fox jumped over the lazy brown dogs.",
@@ -48,6 +45,7 @@
 
   @Override
   protected void setUp() throws Exception {
+    super.setUp();
     directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
     for (int i = 0; i < DOCS.length; i++){