You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/10/15 16:08:39 UTC

svn commit: r1183642 [2/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/ core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/cl...

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java Sat Oct 15 14:08:33 2011
@@ -32,22 +32,20 @@ import java.io.File;
 
 public class AdjacencyMatrixJobTest extends GraphTestCase {
 
-  File verticesFile;
-  File edgesFile;
-  File indexedVerticesFile;
-  File outputDir;
-  File tempDir;
-
-  int numVertices;
-  double stayingProbability;
-  Matrix expectedAdjacencyMatrix;
-
-  Configuration conf;
+  private File edgesFile;
+  private File indexedVerticesFile;
+  private File outputDir;
+  private File tempDir;
+  private int numVertices;
+  private double stayingProbability;
+  private Matrix expectedAdjacencyMatrix;
+  private Configuration conf;
 
+  @Override
   @Before
   public void setUp() throws Exception {
     super.setUp();
-    verticesFile = getTestTempFile("vertices.txt");
+    File verticesFile = getTestTempFile("vertices.txt");
     edgesFile = getTestTempFile("edges.seq");
     indexedVerticesFile = getTestTempFile("indexedVertices.seq");
     outputDir = getTestTempDir("output");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java Sat Oct 15 14:08:33 2011
@@ -20,8 +20,6 @@ package org.apache.mahout.math.hadoop.st
 import java.io.File;
 import java.io.IOException;
 
-import junit.framework.Assert;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -165,8 +163,7 @@ public class LocalSSVDSolverDenseTest ex
     // used to generate surrogate input
 
     for (int i = 0; i < k; i++) {
-      Assert
-        .assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues[i])
+      assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues[i])
             / singularValues.getQuick(i)) <= s_precisionPct / 100);
     }
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Sat Oct 15 14:08:33 2011
@@ -25,7 +25,6 @@ import java.util.LinkedList;
 import java.util.Random;
 
 import com.google.common.io.Closeables;
-import junit.framework.Assert;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -158,14 +157,11 @@ public class LocalSSVDSolverSparseSequen
     SingularValueDecomposition svd2 =
       new SingularValueDecomposition(new DenseMatrix(a));
 
-    a = null;
-
     double[] svalues2 = svd2.getSingularValues();
     dumpSv(svalues2);
 
     for (int i = 0; i < k + p; i++) {
-      Assert
-        .assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon);
+      assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon);
     }
 
     double[][] mQ =

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java Sat Oct 15 14:08:33 2011
@@ -19,8 +19,6 @@ package org.apache.mahout.math.hadoop.st
 
 import java.util.Random;
 
-import junit.framework.Assert;
-
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseMatrix;
@@ -92,17 +90,16 @@ public class SSVDPrototypeTest extends M
       if (Math.abs(1 - norm) < epsilon) {
         rank++;
       } else {
-        Assert.assertTrue(Math.abs(norm) < epsilon);
+        assertTrue(Math.abs(norm) < epsilon);
       }
 
       for (int j = 0; j <= i; j++) {
         Vector e_j = mtx.viewColumn(j);
         double dot = ei.dot(e_j);
-        Assert
-            .assertTrue(Math.abs((i == j && rank > j ? 1 : 0) - dot) < epsilon);
+        assertTrue(Math.abs((i == j && rank > j ? 1 : 0) - dot) < epsilon);
       }
     }
-    Assert.assertTrue((!insufficientRank && rank == n) || (insufficientRank && rank < n));
+    assertTrue((!insufficientRank && rank == n) || (insufficientRank && rank < n));
 
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java Sat Oct 15 14:08:33 2011
@@ -68,7 +68,7 @@ public final class BookCrossingDataModel
           continue;
         }
         // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
-        String convertedLine = BookCrossingDataModel.NON_DIGIT_SEMICOLON_PATTERN.matcher(line)
+        String convertedLine = NON_DIGIT_SEMICOLON_PATTERN.matcher(line)
             .replaceAll("").replace(';', ',');
         // If this means we deleted an entire ID -- few cases like that -- skip the line
         if (convertedLine.contains(",,")) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/EmailUtility.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/EmailUtility.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/EmailUtility.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/EmailUtility.java Sat Oct 15 14:08:33 2011
@@ -1,5 +1,21 @@
-package org.apache.mahout.cf.taste.example.email;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
@@ -13,38 +29,34 @@ import org.apache.mahout.math.map.OpenOb
 
 import java.io.IOException;
 import java.net.URI;
+import java.util.regex.Pattern;
 
-/**
- *
- *
- **/
 public final class EmailUtility {
+
   public static final String SEPARATOR = "separator";
   public static final String MSG_IDS_PREFIX = "msgIdsPrefix";
   public static final String FROM_PREFIX = "fromPrefix";
   public static final String MSG_ID_DIMENSION = "msgIdDim";
   public static final String FROM_INDEX = "fromIdx";
   public static final String REFS_INDEX = "refsIdx";
+  private static final String[] EMPTY = new String[0];
+  private static final Pattern ADDRESS_CLEANUP = Pattern.compile("mailto:|<|>|\\[|\\]|\\=20");
+  private static final Pattern ANGLE_BRACES = Pattern.compile("<|>");
+  private static final Pattern SPACE_OR_CLOSE_ANGLE = Pattern.compile(">|\\s+");
 
   private EmailUtility() {
-
   }
 
   /**
    * Strip off some spurious characters that make it harder to dedup
-   *
-   * @param address
-   * @return
    */
-  public static String cleanUpEmailAddress(String address) {
+  public static String cleanUpEmailAddress(CharSequence address) {
     //do some cleanup to normalize some things, like: Key: karthik ananth <ka...@gmail.com>: Value: 178
     //Key: karthik ananth [mailto:karthik.jcecs@gmail.com]=20: Value: 179
     //TODO: is there more to clean up here?
-    address = address.replaceAll("mailto:|<|>|\\[|\\]|\\=20", "");
-    return address;
+    return ADDRESS_CLEANUP.matcher(address).replaceAll("");
   }
 
-
   public static void loadDictionaries(Configuration conf, String fromPrefix,
                                       OpenObjectIntHashMap<String> fromDictionary,
                                       String msgIdPrefix,
@@ -53,8 +65,7 @@ public final class EmailUtility {
     URI[] localFiles = DistributedCache.getCacheFiles(conf);
     Preconditions.checkArgument(localFiles != null,
             "missing paths from the DistributedCache");
-    for (int i = 0; i < localFiles.length; i++) {
-      URI localFile = localFiles[i];
+    for (URI localFile : localFiles) {
       Path dictionaryFile = new Path(localFile.getPath());
       // key is word value is id
 
@@ -66,7 +77,7 @@ public final class EmailUtility {
       }
       if (dictionary != null) {
         for (Pair<Writable, IntWritable> record
-                : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
+            : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
           dictionary.put(record.getFirst().toString(), record.getSecond().get());
         }
       }
@@ -74,14 +85,12 @@ public final class EmailUtility {
 
   }
 
-  private static final String [] EMPTY = new String[0];
-
-  public static String[] parseReferences(String rawRefs) {
-    String[] splits = null;
+  public static String[] parseReferences(CharSequence rawRefs) {
+    String[] splits;
     if (rawRefs != null && rawRefs.length() > 0) {
-      splits = rawRefs.split(">|\\s+");
+      splits = SPACE_OR_CLOSE_ANGLE.split(rawRefs);
       for (int i = 0; i < splits.length; i++) {
-        splits[i] = splits[i].replaceAll("<|>", "");
+        splits[i] = ANGLE_BRACES.matcher(splits[i]).replaceAll("");
       }
     } else {
       splits = EMPTY;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/FromEmailToDictionaryMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/FromEmailToDictionaryMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/FromEmailToDictionaryMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/FromEmailToDictionaryMapper.java Sat Oct 15 14:08:33 2011
@@ -1,24 +1,38 @@
-package org.apache.mahout.cf.taste.example.email;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
 
 import java.io.IOException;
 
 /**
  *  Assumes the input is in the format created by {@link org.apache.mahout.text.SequenceFilesFromMailArchives}
- *
- **/
-public class FromEmailToDictionaryMapper extends
-        Mapper<Text, Text, Text, VarIntWritable> {
-  private String separator = "\n";
+ */
+public final class FromEmailToDictionaryMapper extends Mapper<Text, Text, Text, VarIntWritable> {
 
+  private String separator;
 
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
+    super.setup(context);
     separator = context.getConfiguration().get(EmailUtility.SEPARATOR);
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToDictionaryReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToDictionaryReducer.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToDictionaryReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToDictionaryReducer.java Sat Oct 15 14:08:33 2011
@@ -1,10 +1,25 @@
-package org.apache.mahout.cf.taste.example.email;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
 
 import java.io.IOException;
 
@@ -13,13 +28,12 @@ import java.io.IOException;
  * Value: the count
  * Out Key: the string id
  * Out Value: the sum of the counts
- *
- **/
-public class MailToDictionaryReducer extends
-        Reducer<Text, VarIntWritable, Text, VarIntWritable> {
+ */
+public final class MailToDictionaryReducer extends Reducer<Text, VarIntWritable, Text, VarIntWritable> {
 
   @Override
-  protected void reduce(Text key, Iterable<VarIntWritable> values, Context context) throws IOException, InterruptedException {
+  protected void reduce(Text key, Iterable<VarIntWritable> values, Context context)
+    throws IOException, InterruptedException {
     int sum = 0;
     for (VarIntWritable value : values) {
       sum += value.get();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.cf.taste.example.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.cf.taste.examp
  * limitations under the License.
  */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import com.google.common.collect.Lists;
 import com.google.common.io.Closeables;
@@ -64,20 +64,19 @@ import java.util.concurrent.atomic.Atomi
  * <p/>
  * It also outputs a side table mapping the row ids to their original and the message ids to the message thread id
  */
-public class MailToPrefsDriver extends AbstractJob {
+public final class MailToPrefsDriver extends AbstractJob {
+
   private static final Logger log = LoggerFactory.getLogger(MailToPrefsDriver.class);
 
   private static final String OUTPUT_FILES_PATTERN = "part-*";
   private static final int DICTIONARY_BYTE_OVERHEAD = 4;
 
-
   public static void main(String[] args) throws Exception {
     ToolRunner.run(new Configuration(), new MailToPrefsDriver(), args);
   }
 
   @Override
   public int run(String[] args) throws Exception {
-    int result = 0;
     addInputOption();
     addOutputOption();
     addOption(DefaultOptionCreator.overwriteOption().create());
@@ -99,9 +98,8 @@ public class MailToPrefsDriver extends A
 
     AtomicInteger currentPhase = new AtomicInteger();
     int[] msgDim = new int[1];
-    int[] fromDim = new int[1];
     //TODO: mod this to not do so many passes over the data.  Dictionary creation could probably be a chain mapper
-    List<Path> msgIdChunks = null, fromChunks = null;
+    List<Path> msgIdChunks = null;
     boolean overwrite = hasOption(DefaultOptionCreator.OVERWRITE_OPTION);
     // create the dictionary between message ids and longs
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -126,6 +124,7 @@ public class MailToPrefsDriver extends A
       msgIdChunks = createDictionaryChunks(msgIdsPath, output, "msgIds-dictionary-", createMsgIdDictionary.getConfiguration(), chunkSize, msgDim);
     }
     //create the dictionary between from email addresses and longs
+    List<Path> fromChunks = null;
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       Path fromIdsPath = new Path(output, "fromIds");
       if (overwrite) {
@@ -145,6 +144,7 @@ public class MailToPrefsDriver extends A
       createFromIdDictionary.getConfiguration().set(EmailUtility.SEPARATOR, separator);
       createFromIdDictionary.waitForCompletion(true);
       //write out the dictionary at the top level
+      int[] fromDim = new int[1];
       fromChunks = createDictionaryChunks(fromIdsPath, output, "fromIds-dictionary-", createFromIdDictionary.getConfiguration(), chunkSize, fromDim);
     }
     //OK, we have our dictionaries, let's output the real thing we need: <from_id -> <msgId, msgId, msgId, ...>>
@@ -152,7 +152,7 @@ public class MailToPrefsDriver extends A
       //Job map
       //may be a way to do this so that we can load the from ids in memory, if they are small enough so that we don't need the double loop
       log.info("Creating recommendation matrix");
-      int i = 0, j = 0;
+      int i = 0;
       Path vecPath = new Path(output, "recInput");
       if (overwrite) {
         HadoopUtil.delete(conf, vecPath);
@@ -164,9 +164,10 @@ public class MailToPrefsDriver extends A
       conf.set(EmailUtility.FROM_INDEX, parsedArgs.get("--from"));
       conf.set(EmailUtility.REFS_INDEX, parsedArgs.get("--refs"));
       conf.set(EmailUtility.SEPARATOR, separator);
+      int j = 0;
       for (Path fromChunk : fromChunks) {
         for (Path idChunk : msgIdChunks) {
-          Path out = new Path(vecPath, "tmp-" + i + "-" + j);
+          Path out = new Path(vecPath, "tmp-" + i + '-' + j);
           DistributedCache.setCacheFiles(new URI[]{fromChunk.toUri(), idChunk.toUri()}, conf);
           Job createRecMatrix = prepareJob(input, out, SequenceFileInputFormat.class,
                   MailToRecMapper.class, NullWritable.class, Text.class,
@@ -175,10 +176,10 @@ public class MailToPrefsDriver extends A
           createRecMatrix.waitForCompletion(true);
           //copy the results up a level
           //HadoopUtil.copyMergeSeqFiles(out.getFileSystem(conf), out, vecPath.getFileSystem(conf), outPath, true, conf, "");
-          FileStatus fs[] = HadoopUtil.getFileStatus(new Path(out, "*"), PathType.GLOB, PathFilters.partFilter(), null, conf);
+          FileStatus[] fs = HadoopUtil.getFileStatus(new Path(out, "*"), PathType.GLOB, PathFilters.partFilter(), null, conf);
           for (int k = 0; k < fs.length; k++) {
             FileStatus f = fs[k];
-            Path outPath = new Path(vecPath, "chunk-" + i + "-" + j + "-" + k);
+            Path outPath = new Path(vecPath, "chunk-" + i + '-' + j + '-' + k);
             FileUtil.copy(f.getPath().getFileSystem(conf), f.getPath(), outPath.getFileSystem(conf), outPath, true, overwrite, conf);
           }
           HadoopUtil.delete(conf, out);
@@ -195,7 +196,7 @@ public class MailToPrefsDriver extends A
       //HadoopUtil.copyMergeSeqFiles(vecPath.getFileSystem(conf), vecPath, mergePath.getFileSystem(conf), mergePath, false, conf, "\n");
     }
 
-    return result;
+    return 0;
   }
 
   private static List<Path> createDictionaryChunks(Path inputPath,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.cf.taste.example.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,31 +15,28 @@ package org.apache.mahout.cf.taste.examp
  * limitations under the License.
  */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarIntWritable;
 import org.apache.mahout.math.map.OpenObjectIntHashMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
-/**
- *
- *
- **/
-public class MailToRecMapper extends
-        Mapper<Text, Text, NullWritable, Text> {
-  private transient static Logger log = LoggerFactory.getLogger(MailToRecMapper.class);
-  private OpenObjectIntHashMap<String> fromDictionary = new OpenObjectIntHashMap<String>();
-  private OpenObjectIntHashMap<String> msgIdDictionary = new OpenObjectIntHashMap<String>();
+public final class MailToRecMapper extends Mapper<Text, Text, NullWritable, Text> {
+
+  private static final Logger log = LoggerFactory.getLogger(MailToRecMapper.class);
+
+  private final OpenObjectIntHashMap<String> fromDictionary = new OpenObjectIntHashMap<String>();
+  private final OpenObjectIntHashMap<String> msgIdDictionary = new OpenObjectIntHashMap<String>();
   private String separator = "\n";
-  protected int fromIdx;
-  protected int refsIdx;
+  private int fromIdx;
+  private int refsIdx;
 
   public enum Counters {
     REFERENCE, ORIGINAL
@@ -48,6 +44,7 @@ public class MailToRecMapper extends
 
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
+    super.setup(context);
     Configuration conf = context.getConfiguration();
     String fromPrefix = conf.get(EmailUtility.FROM_PREFIX);
     String msgPrefix = conf.get(EmailUtility.MSG_IDS_PREFIX);
@@ -61,7 +58,6 @@ public class MailToRecMapper extends
   @Override
   protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
 
-    String msgId = null;
     int msgIdKey = Integer.MIN_VALUE;
 
 
@@ -87,9 +83,9 @@ public class MailToRecMapper extends
     if (msgIdKey == Integer.MIN_VALUE) {//we don't have any references, so use the msg id
       //get the msg id and the from and output the associated ids
       String keyStr = key.toString();
-      int idx = keyStr.lastIndexOf("/");
+      int idx = keyStr.lastIndexOf('/');
       if (idx != -1) {
-        msgId = keyStr.substring(idx + 1);
+        String msgId = keyStr.substring(idx + 1);
         msgIdKey = msgIdDictionary.get(msgId);
         context.getCounter(Counters.ORIGINAL).increment(1);
       }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MsgIdToDictionaryMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MsgIdToDictionaryMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MsgIdToDictionaryMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MsgIdToDictionaryMapper.java Sat Oct 15 14:08:33 2011
@@ -1,5 +1,21 @@
-package org.apache.mahout.cf.taste.example.email;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
+package org.apache.mahout.cf.taste.example.email;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -10,8 +26,8 @@ import java.io.IOException;
 /**
  * Assumes the input is in the format created by {@link org.apache.mahout.text.SequenceFilesFromMailArchives}
  */
-public class MsgIdToDictionaryMapper extends
-        Mapper<Text, Text, Text, VarIntWritable> {
+public final class MsgIdToDictionaryMapper extends Mapper<Text, Text, Text, VarIntWritable> {
+
   public enum Counters {
     NO_MESSAGE_ID
   }
@@ -20,13 +36,12 @@ public class MsgIdToDictionaryMapper ext
   protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
     //message id is in the key: /201008/AANLkTikvVnhNH+Y5AGEwqd2=u0CFv2mCm0ce6E6oBnj1@mail.gmail.com
     String keyStr = key.toString();
-    int idx = keyStr.lastIndexOf("/");
-    String msgId = null;
-    if (idx != -1) {
-      msgId = keyStr.substring(idx + 1);
-      context.write(new Text(msgId), new VarIntWritable(1));
-    } else {
+    int idx = keyStr.lastIndexOf('/');
+    if (idx == -1) {
       context.getCounter(Counters.NO_MESSAGE_ID).increment(1);
+    } else {
+      String msgId = keyStr.substring(idx + 1);
+      context.write(new Text(msgId), new VarIntWritable(1));
     }
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Sat Oct 15 14:08:33 2011
@@ -65,8 +65,8 @@ public class WikipediaDatasetCreatorMapp
     String catMatch = findMatchingCategory(document);
     if (!"Unknown".equals(catMatch)) {
       StringBuilder contents = new StringBuilder(1000);
-      document = StringEscapeUtils.unescapeHtml(WikipediaDatasetCreatorMapper.CLOSE_TEXT_TAG_PATTERN.matcher(
-          WikipediaDatasetCreatorMapper.OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
+      document = StringEscapeUtils.unescapeHtml(CLOSE_TEXT_TAG_PATTERN.matcher(
+          OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
       TokenStream stream = analyzer.reusableTokenStream(catMatch, new StringReader(document));
       CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
       stream.reset();
@@ -74,7 +74,7 @@ public class WikipediaDatasetCreatorMapp
         contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
       }
       context.write(
-          new Text(WikipediaDatasetCreatorMapper.SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
+          new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
           new Text(contents.toString()));
     }
   }
@@ -132,11 +132,12 @@ public class WikipediaDatasetCreatorMapp
       // categories.add(category.toLowerCase());
       if (exactMatchOnly && inputCategories.contains(category)) {
         return category;
-      } else if (!exactMatchOnly) {
+      }
+      if (!exactMatchOnly) {
         for (int i = 0; i < inputCategories.size(); i++) {
           String inputCategory = inputCategories.get(i);
           Pattern inputCategoryPattern = inputCategoryPatterns.get(i);
-          if (inputCategoryPattern.matcher(category).matches()) { // inexact match with word boundary. 
+          if (inputCategoryPattern.matcher(category).matches()) { // inexact match with word boundary.
             return inputCategory;
           }
         }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.classifier.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,18 +15,26 @@ package org.apache.mahout.classifier.ema
  * limitations under the License.
  */
 
+package org.apache.mahout.classifier.email;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.utils.email.MailProcessor;
 
 import java.io.IOException;
+import java.util.Locale;
+import java.util.regex.Pattern;
 
 /**
- * Convert the labels created by the {@link org.apache.mahout.utils.email.MailProcessor} to one consumable by the classifiers
+ * Convert the labels created by the {@link MailProcessor} to one consumable by the classifiers
  */
 public class PrepEmailMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
+
+  private static final Pattern DASH_DOT = Pattern.compile("-|\\.");
+  private static final Pattern SLASH = Pattern.compile("\\/");
+
   private boolean useListName = false;//if true, use the project name and the list name in label creation
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
@@ -35,18 +42,24 @@ public class PrepEmailMapper extends Map
   }
 
   @Override
-  protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException {
+  protected void map(WritableComparable<?> key, VectorWritable value, Context context)
+    throws IOException, InterruptedException {
     String input = key.toString();
     ///Example: /cocoon.apache.org/dev/200307.gz/001401c3414f$8394e160$1e01a8c0@WRPO
-    String[] splits = input.split("\\/");
+    String[] splits = SLASH.split(input);
     //we need the first two splits;
     if (splits.length >= 3) {
-      StringBuilder bldr = new StringBuilder(splits[1].replaceAll("-|\\.", "_").toLowerCase());
-      if (useListName == true) {
-        bldr.append("_").append(splits[2].replaceAll("-|\\.", "_").toLowerCase());
+      StringBuilder bldr = new StringBuilder();
+      bldr.append(escape(splits[1]));
+      if (useListName) {
+        bldr.append('_').append(escape(splits[2]));
       }
       context.write(new Text(bldr.toString()), value);
     }
 
   }
+  
+  private static String escape(CharSequence value) {
+    return DASH_DOT.matcher(value).replaceAll("_").toLowerCase(Locale.ENGLISH);
+  }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.classifier.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.classifier.ema
  * limitations under the License.
  */
 
+package org.apache.mahout.classifier.email;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -24,19 +24,18 @@ import org.apache.mahout.math.VectorWrit
 import java.io.IOException;
 import java.util.Iterator;
 
-/**
- *
- *
- **/
 public class PrepEmailReducer extends Reducer<Text, VectorWritable, Text, VectorWritable>{
-  long maxItemsPerLabel = 10000;
+
+  private long maxItemsPerLabel = 10000;
+
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
     maxItemsPerLabel = Long.parseLong(context.getConfiguration().get(PrepEmailVectorsDriver.ITEMS_PER_CLASS));
   }
 
   @Override
-  protected void reduce(Text key, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException {
+  protected void reduce(Text key, Iterable<VectorWritable> values, Context context)
+    throws IOException, InterruptedException {
     //TODO: support randomization?  Likely not needed due to the SplitInput utility which does random selection
     long i = 0;
     Iterator<VectorWritable> iterator = values.iterator();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Sat Oct 15 14:08:33 2011
@@ -211,7 +211,7 @@ public final class PosTagger {
   }
 
   private static void testModel(String testingURL) throws IOException {
-    log.info("Reading and parsing test data file from URL:" + testingURL);
+    log.info("Reading and parsing test data file from URL: {}", testingURL);
     long start = System.currentTimeMillis();
     readFromURL(testingURL, false);
     long end = System.currentTimeMillis();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java Sat Oct 15 14:08:33 2011
@@ -29,6 +29,7 @@ import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import com.google.common.io.Closeables;
@@ -61,8 +62,8 @@ public class AdaptiveLogisticModelParame
 
   public void checkParameters() {
     if (prior != null) {
-      if ("TP".equals(prior.toUpperCase().trim()) ||
-          "EBP".equals(prior.toUpperCase().trim())) {
+      if ("TP".equals(prior.toUpperCase(Locale.ENGLISH).trim()) ||
+          "EBP".equals(prior.toUpperCase(Locale.ENGLISH).trim())) {
         if (Double.isNaN(priorOption)) {
           throw new IllegalArgumentException("You must specify a double value for TPrior and ElasticBandPrior.");
         }
@@ -74,19 +75,19 @@ public class AdaptiveLogisticModelParame
     if (cmd == null) {
       return null;
     }
-    if ("L1".equals(cmd.toUpperCase().trim())) {
+    if ("L1".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new L1();
     }
-    if ("L2".equals(cmd.toUpperCase().trim())) {
+    if ("L2".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new L2();
     }
-    if ("UP".equals(cmd.toUpperCase().trim())) {
+    if ("UP".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new UniformPrior();
     }
-    if ("TP".equals(cmd.toUpperCase().trim())) {
+    if ("TP".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new TPrior(priorOption);
     }
-    if ("EBP".equals(cmd.toUpperCase().trim())) {
+    if ("EBP".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new ElasticBandPrior(priorOption);
     }
 
@@ -97,10 +98,10 @@ public class AdaptiveLogisticModelParame
     if (cmd == null) {
       return null;
     }
-    if ("GLOBAL".equals(cmd.toUpperCase().trim())) {
+    if ("GLOBAL".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new GlobalOnlineAuc();
     }
-    if ("GROUPED".equals(cmd.toUpperCase().trim())) {
+    if ("GROUPED".equals(cmd.toUpperCase(Locale.ENGLISH).trim())) {
       return new GroupedOnlineAuc();
     }
     return null;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Sat Oct 15 14:08:33 2011
@@ -123,7 +123,7 @@ public class DisplayClustering extends F
     int cx = CLUSTERS.size() - 1;
     for (List<Cluster> clusters : CLUSTERS) {
       g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx--)]);
+      g2.setColor(COLORS[Math.min(COLORS.length - 1, cx--)]);
       for (Cluster cluster : clusters) {
         plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
       }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sat Oct 15 14:08:33 2011
@@ -57,8 +57,7 @@ public final class Job extends AbstractJ
       Path output = new Path("output");
       Configuration conf = new Configuration();
       HadoopUtil.delete(conf, output);
-      new Job().run(conf, new Path("testdata"), output,
-          new EuclideanDistanceMeasure(), 6, 0.5, 10);
+      run(conf, new Path("testdata"), output, new EuclideanDistanceMeasure(), 6, 0.5, 10);
     }
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java Sat Oct 15 14:08:33 2011
@@ -186,7 +186,7 @@ public class BuildForest extends Configu
 
     // store the decision forest in the output path
     Path forestPath = new Path(outputPath, "forest.seq");
-    log.info("Storing the forest in: " + forestPath);
+    log.info("Storing the forest in: {}", forestPath);
     DFUtils.storeWritable(getConf(), forestPath, forest);
   }
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/EuropeanDistanceLookup.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/EuropeanDistanceLookup.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/EuropeanDistanceLookup.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/EuropeanDistanceLookup.java Sat Oct 15 14:08:33 2011
@@ -52,7 +52,7 @@ public final class EuropeanDistanceLooku
     amsterdam.put("Rome", 1304);
     amsterdam.put("Stockholm", 1132);
     amsterdam.put("Vienna", 938);
-    EuropeanDistanceLookup.DISTANCES.put("Amsterdam", amsterdam);
+    DISTANCES.put("Amsterdam", amsterdam);
     
     Map<String,Integer> athens = Maps.newHashMapWithExpectedSize(20);
     athens.put("Amsterdam", 2162);
@@ -70,7 +70,7 @@ public final class EuropeanDistanceLooku
     athens.put("Rome", 1040);
     athens.put("Stockholm", 2410);
     athens.put("Vienna", 1280);
-    EuropeanDistanceLookup.DISTANCES.put("Athens", athens);
+    DISTANCES.put("Athens", athens);
     
     Map<String,Integer> berlin = Maps.newHashMapWithExpectedSize(20);
     berlin.put("Amsterdam", 576);
@@ -88,7 +88,7 @@ public final class EuropeanDistanceLooku
     berlin.put("Rome", 1185);
     berlin.put("Stockholm", 818);
     berlin.put("Vienna", 525);
-    EuropeanDistanceLookup.DISTANCES.put("Berlin", berlin);
+    DISTANCES.put("Berlin", berlin);
     
     Map<String,Integer> brussels = Maps.newHashMapWithExpectedSize(20);
     brussels.put("Amsterdam", 171);
@@ -106,7 +106,7 @@ public final class EuropeanDistanceLooku
     brussels.put("Rome", 1182);
     brussels.put("Stockholm", 1284);
     brussels.put("Vienna", 917);
-    EuropeanDistanceLookup.DISTANCES.put("Brussels", brussels);
+    DISTANCES.put("Brussels", brussels);
     
     Map<String,Integer> copenhagen = Maps.newHashMapWithExpectedSize(20);
     copenhagen.put("Amsterdam", 622);
@@ -124,7 +124,7 @@ public final class EuropeanDistanceLooku
     copenhagen.put("Rome", 1540);
     copenhagen.put("Stockholm", 526);
     copenhagen.put("Vienna", 876);
-    EuropeanDistanceLookup.DISTANCES.put("Copenhagen", copenhagen);
+    DISTANCES.put("Copenhagen", copenhagen);
     
     Map<String,Integer> dublin = Maps.newHashMapWithExpectedSize(20);
     dublin.put("Amsterdam", 757);
@@ -142,7 +142,7 @@ public final class EuropeanDistanceLooku
     dublin.put("Rome", 1903);
     dublin.put("Stockholm", 1625);
     dublin.put("Vienna", 1687);
-    EuropeanDistanceLookup.DISTANCES.put("Dublin", dublin);
+    DISTANCES.put("Dublin", dublin);
     
     Map<String,Integer> helsinki = Maps.newHashMapWithExpectedSize(20);
     helsinki.put("Amsterdam", 1506);
@@ -160,7 +160,7 @@ public final class EuropeanDistanceLooku
     helsinki.put("Rome", 2202);
     helsinki.put("Stockholm", 396);
     helsinki.put("Vienna", 1439);
-    EuropeanDistanceLookup.DISTANCES.put("Helsinki", helsinki);
+    DISTANCES.put("Helsinki", helsinki);
     
     Map<String,Integer> lisbon = Maps.newHashMapWithExpectedSize(20);
     lisbon.put("Amsterdam", 1861);
@@ -178,7 +178,7 @@ public final class EuropeanDistanceLooku
     lisbon.put("Rome", 1873);
     lisbon.put("Stockholm", 2993);
     lisbon.put("Vienna", 2300);
-    EuropeanDistanceLookup.DISTANCES.put("Lisbon", lisbon);
+    DISTANCES.put("Lisbon", lisbon);
     
     Map<String,Integer> london = Maps.newHashMapWithExpectedSize(20);
     london.put("Amsterdam", 356);
@@ -196,7 +196,7 @@ public final class EuropeanDistanceLooku
     london.put("Rome", 1444);
     london.put("Stockholm", 1436);
     london.put("Vienna", 1237);
-    EuropeanDistanceLookup.DISTANCES.put("London", london);
+    DISTANCES.put("London", london);
     
     Map<String,Integer> luxembourg = Maps.newHashMapWithExpectedSize(20);
     luxembourg.put("Amsterdam", 318);
@@ -214,7 +214,7 @@ public final class EuropeanDistanceLooku
     luxembourg.put("Rome", 995);
     luxembourg.put("Stockholm", 1325);
     luxembourg.put("Vienna", 761);
-    EuropeanDistanceLookup.DISTANCES.put("Luxembourg", luxembourg);
+    DISTANCES.put("Luxembourg", luxembourg);
     
     Map<String,Integer> madrid = Maps.newHashMapWithExpectedSize(20);
     madrid.put("Amsterdam", 1477);
@@ -232,7 +232,7 @@ public final class EuropeanDistanceLooku
     madrid.put("Rome", 1377);
     madrid.put("Stockholm", 2596);
     madrid.put("Vienna", 1812);
-    EuropeanDistanceLookup.DISTANCES.put("Madrid", madrid);
+    DISTANCES.put("Madrid", madrid);
     
     Map<String,Integer> paris = Maps.newHashMapWithExpectedSize(20);
     paris.put("Amsterdam", 429);
@@ -250,7 +250,7 @@ public final class EuropeanDistanceLooku
     paris.put("Rome", 1117);
     paris.put("Stockholm", 1549);
     paris.put("Vienna", 1037);
-    EuropeanDistanceLookup.DISTANCES.put("Paris", paris);
+    DISTANCES.put("Paris", paris);
     
     Map<String,Integer> rome = Maps.newHashMapWithExpectedSize(20);
     rome.put("Amsterdam", 1304);
@@ -268,7 +268,7 @@ public final class EuropeanDistanceLooku
     rome.put("Rome", 0);
     rome.put("Stockholm", 1984);
     rome.put("Vienna", 765);
-    EuropeanDistanceLookup.DISTANCES.put("Rome", rome);
+    DISTANCES.put("Rome", rome);
     
     Map<String,Integer> stockholm = Maps.newHashMapWithExpectedSize(20);
     stockholm.put("Amsterdam", 1132);
@@ -286,7 +286,7 @@ public final class EuropeanDistanceLooku
     stockholm.put("Rome", 1984);
     stockholm.put("Stockholm", 0);
     stockholm.put("Vienna", 1247);
-    EuropeanDistanceLookup.DISTANCES.put("Stockholm", stockholm);
+    DISTANCES.put("Stockholm", stockholm);
     
     Map<String,Integer> vienna = Maps.newHashMapWithExpectedSize(20);
     vienna.put("Amsterdam", 938);
@@ -304,18 +304,18 @@ public final class EuropeanDistanceLooku
     vienna.put("Rome", 765);
     vienna.put("Stockholm", 1247);
     vienna.put("Vienna", 0);
-    EuropeanDistanceLookup.DISTANCES.put("Vienna", vienna);
+    DISTANCES.put("Vienna", vienna);
   }
   
   @Override
   public List<String> getKnownCities() {
-    List<String> cities = Lists.newArrayList(EuropeanDistanceLookup.DISTANCES.keySet());
+    List<String> cities = Lists.newArrayList(DISTANCES.keySet());
     Collections.sort(cities);
     return cities;
   }
   
   @Override
   public int getDistance(String startingCity, String destinationCity) {
-    return EuropeanDistanceLookup.DISTANCES.get(startingCity).get(destinationCity);
+    return DISTANCES.get(startingCity).get(destinationCity);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java Sat Oct 15 14:08:33 2011
@@ -128,7 +128,8 @@ public class WikipediaMapper extends Map
       String category = document.substring(categoryIndex, endIndex).toLowerCase(Locale.ENGLISH).trim();
       if (exactMatchOnly && inputCategories.contains(category)) {
         return category;
-      } else if (!exactMatchOnly) {
+      }
+      if (!exactMatchOnly) {
         for (String inputCategory : inputCategories) {
           if (category.contains(inputCategory)) { // we have an inexact match
             return inputCategory;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Sat Oct 15 14:08:33 2011
@@ -190,8 +190,7 @@ public abstract class AbstractJDBCDataMo
     AbstractJDBCComponent.checkNotNullAndLog("getMinPreferenceSQL", getMinPreferenceSQL);
 
     if (!(dataSource instanceof ConnectionPoolDataSource)) {
-      AbstractJDBCDataModel.log
-          .warn("You are not using ConnectionPoolDataSource. Make sure your DataSource pools connections "
+      log.warn("You are not using ConnectionPoolDataSource. Make sure your DataSource pools connections "
                 + "to the database itself, or database performance will be severely reduced.");
     }
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java Sat Oct 15 14:08:33 2011
@@ -708,7 +708,8 @@ public final class MongoDBDataModel impl
   private Date getDate(Object date) {
     if (date.getClass().getName().contains("Date")) {
       return (Date) date;
-    } else if (date.getClass().getName().contains("String")) {
+    }
+    if (date.getClass().getName().contains("String")) {
       try {
         synchronized (dateFormat) {
           return dateFormat.parse(date.toString());

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputDriver.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputDriver.java Sat Oct 15 14:08:33 2011
@@ -84,7 +84,7 @@ public final class InputDriver {
          "org.apache.mahout.math.RandomAccessSparseVector").toString();
       runJob(input, output, vectorClassName);
     } catch (OptionException e) {
-      InputDriver.log.error("Exception parsing command line: ", e);
+      log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java Sat Oct 15 14:08:33 2011
@@ -40,7 +40,7 @@ public class InputMapper extends Mapper<
   @Override
   protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
 
-    String[] numbers = InputMapper.SPACE.split(values.toString());
+    String[] numbers = SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     Collection<Double> doubles = Lists.newArrayList();
     for (String value : numbers) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputDriver.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputDriver.java Sat Oct 15 14:08:33 2011
@@ -72,7 +72,7 @@ public final class InputDriver {
       Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
       runJob(input, output);
     } catch (OptionException e) {
-      InputDriver.log.error("Exception parsing command line: ", e);
+      log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputMapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/conversion/meanshift/InputMapper.java Sat Oct 15 14:08:33 2011
@@ -38,7 +38,7 @@ public class InputMapper extends Mapper<
 
   @Override
   protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
-    String[] numbers = InputMapper.SPACE.split(values.toString());
+    String[] numbers = SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     Collection<Double> doubles = Lists.newArrayList();
     for (String value : numbers) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java Sat Oct 15 14:08:33 2011
@@ -125,7 +125,7 @@ public class ClusterEvaluator {
     for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
       Cluster cluster = it.next();
       if (invalidCluster(cluster)) {
-        log.info("Pruning cluster Id=" + cluster.getId());
+        log.info("Pruning cluster Id={}", cluster.getId());
         it.remove();
         representativePoints.remove(cluster.getId());
       }
@@ -156,7 +156,7 @@ public class ClusterEvaluator {
       }
     }
     double density = (sum / count - min) / (max - min);
-    log.info("Inter-Cluster Density = " + density);
+    log.info("Inter-Cluster Density = {}", density);
     return density;
   }
 
@@ -186,10 +186,10 @@ public class ClusterEvaluator {
       }
       double density = (sum / count - min) / (max - min);
       avgDensity += density;
-      log.info("Intra-Cluster Density[" + cluster.getId() + "] = " + density);
+      log.info("Intra-Cluster Density[{}] = {}", cluster.getId(), density);
     }
     avgDensity = clusters.isEmpty() ? 0 : avgDensity / clusters.size();
-    log.info("Intra-Cluster Density = " + avgDensity);
+    log.info("Intra-Cluster Density = {}", avgDensity);
     return avgDensity;
 
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java Sat Oct 15 14:08:33 2011
@@ -17,7 +17,6 @@
 
 package org.apache.mahout.text;
 
-import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.nio.charset.Charset;
 import java.util.Map;
@@ -123,7 +122,7 @@ public class SequenceFilesFromDirectory 
    * Override this method in order to parse your additional options from the command line. Do not forget to call
    * super() otherwise standard options (input/output dirs etc) will not be available.
    */
-  protected Map<String, String> parseOptions() throws IOException {
+  protected Map<String, String> parseOptions() {
     Map<String, String> options = Maps.newHashMap();
     options.put(CHUNK_SIZE_OPTION[0], getOption(CHUNK_SIZE_OPTION[0]));
     options.put(FILE_FILTER_CLASS_OPTION[0], getOption(FILE_FILTER_CLASS_OPTION[0]));

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java Sat Oct 15 14:08:33 2011
@@ -228,7 +228,8 @@ public class SplitInput {
 
       if (cmdLine.hasOption(testSplitSizeOpt) && cmdLine.hasOption(testSplitPctOpt)) {
         throw new OptionException(testSplitSizeOpt, "must have either split size or split percentage option, not BOTH");
-      } else if (!cmdLine.hasOption(testSplitSizeOpt) && !cmdLine.hasOption(testSplitPctOpt) && !cmdLine.hasOption(randomSelectionPctOpt) && !cmdLine.hasOption(randomSelectionSizeOpt)) {
+      }
+      if (!cmdLine.hasOption(testSplitSizeOpt) && !cmdLine.hasOption(testSplitPctOpt) && !cmdLine.hasOption(randomSelectionPctOpt) && !cmdLine.hasOption(randomSelectionSizeOpt)) {
         throw new OptionException(testSplitSizeOpt, "must set one of test split size/percentage or randomSelectionSize/percentage");
       }
 
@@ -280,7 +281,8 @@ public class SplitInput {
   public void splitDirectory(Path inputDir) throws IOException {
     if (fs.getFileStatus(inputDir) == null) {
       throw new IOException(inputDir + " does not exist");
-    } else if (!fs.getFileStatus(inputDir).isDir()) {
+    }
+    if (!fs.getFileStatus(inputDir).isDir()) {
       throw new IOException(inputDir + " is not a directory");
     }
 
@@ -301,7 +303,8 @@ public class SplitInput {
   public void splitFile(Path inputFile) throws IOException {
     if (fs.getFileStatus(inputFile) == null) {
       throw new IOException(inputFile + " does not exist");
-    } else if (fs.getFileStatus(inputFile).isDir()) {
+    }
+    if (fs.getFileStatus(inputFile).isDir()) {
       throw new IOException(inputFile + " is a directory");
     }
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sat Oct 15 14:08:33 2011
@@ -50,7 +50,12 @@ import java.util.Map;
 import java.util.TreeMap;
 
 public final class ClusterDumper extends AbstractJob {
-  public enum OUTPUT_FORMAT{TEXT, CSV, GRAPH_ML};
+
+  public enum OUTPUT_FORMAT {
+    TEXT,
+    CSV,
+    GRAPH_ML,
+  }
 
   public static final String OUTPUT_OPTION = "output";
   public static final String DICTIONARY_TYPE_OPTION = "dictionaryType";
@@ -172,25 +177,19 @@ public final class ClusterDumper extends
     }
   }
 
-  protected ClusterWriter createClusterWriter(Writer writer, String[] dictionary) throws IOException {
+  ClusterWriter createClusterWriter(Writer writer, String[] dictionary) throws IOException {
     ClusterWriter result = null;
 
     switch (outputFormat){
-      case TEXT:{
+      case TEXT:
         result = new ClusterDumperWriter(writer, clusterIdToPoints, numTopFeatures, dictionary, subString);
         break;
-      }
-      case CSV:{
+      case CSV:
         result = new CSVClusterWriter(writer, clusterIdToPoints);
         break;
-      }
-      case GRAPH_ML:{
+      case GRAPH_ML:
         result = new GraphMLClusterWriter(writer, clusterIdToPoints);
         break;
-      }
-      default:{
-        break;
-      }
     }
     return result;
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java Sat Oct 15 14:08:33 2011
@@ -1,25 +1,43 @@
-package org.apache.mahout.utils.clustering;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
+package org.apache.mahout.utils.clustering;
 
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.utils.vectors.io.AbstractClusterWriter;
-import org.apache.mahout.utils.vectors.io.ClusterWriter;
 
 import java.io.IOException;
 import java.io.Writer;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 /**
  * GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
- *
- **/
-public class GraphMLClusterWriter extends AbstractClusterWriter implements ClusterWriter {
+ */
+public class GraphMLClusterWriter extends AbstractClusterWriter {
+
+  private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
 
-  public GraphMLClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints) throws IOException {
+  public GraphMLClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints)
+    throws IOException {
     super(writer, clusterIdToPoints);
     writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
     writer.append("<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n" +
@@ -28,25 +46,26 @@ public class GraphMLClusterWriter extend
             "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\">");
     writer.append("<graph edgedefault=\"undirected\">");
   }
-  /*
-<?xml version="1.0" encoding="UTF-8"?>
-<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
-xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
-http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
-<graph id="G" edgedefault="undirected">
-<node id="n0"/>
-<node id="n1"/>
-<edge id="e1" source="n0" target="n1"/>
-</graph>
-</graphml>
 
+  /*
+    <?xml version="1.0" encoding="UTF-8"?>
+    <graphml xmlns="http://graphml.graphdrawing.org/xmlns"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
+    http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
+    <graph id="G" edgedefault="undirected">
+    <node id="n0"/>
+    <node id="n1"/>
+    <edge id="e1" source="n0" target="n1"/>
+    </graph>
+    </graphml>
    */
+
   @Override
   public void write(Cluster cluster) throws IOException {
     StringBuilder line = new StringBuilder();
     line.append(createNode(String.valueOf(cluster.getId())));
-    List<WeightedVectorWritable> points = clusterIdToPoints.get(cluster.getId());
+    List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
     if (points != null) {
       for (WeightedVectorWritable point : points) {
         Vector theVec = point.getVector();
@@ -57,26 +76,26 @@ http://graphml.graphdrawing.org/xmlns/1.
         } else {
           vecStr = theVec.asFormatString();
           //do some basic manipulations for display
-          vecStr = vecStr.replaceAll("\\{|\\:|\\,|\\}", "_");
+          vecStr = VEC_PATTERN.matcher(vecStr).replaceAll("_");
           line.append(createNode(vecStr));
         }
         line.append(createEdge(String.valueOf(cluster.getId()), vecStr));
       }
-      writer.append(line).append("\n");
+      getWriter().append(line).append("\n");
     }
   }
 
-  private String createEdge(String left, String right) {
-    return "<edge id=\"" + left + "_" + right + "\" source=\"" + left + "\" target=\"" + right + "\"/>";
+  private static String createEdge(String left, String right) {
+    return "<edge id=\"" + left + '_' + right + "\" source=\"" + left + "\" target=\"" + right + "\"/>";
   }
 
-  private String createNode(String s) {
+  private static String createNode(String s) {
     return "<node id=\"" + s + "\"/>";
   }
 
   @Override
   public void close() throws IOException {
-    writer.append("</graph>");
+    getWriter().append("</graph>");
     super.close();
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,32 +15,110 @@ package org.apache.mahout.utils.email;
  * limitations under the License.
  */
 
-
-
+package org.apache.mahout.utils.email;
 
 import java.io.File;
 import java.nio.charset.Charset;
 import java.util.Map;
 import java.util.regex.Pattern;
 
-/**
-*
-*
-**/
 public class MailOptions {
+
   public static final String FROM = "FROM";
   public static final String TO = "TO";
   public static final String REFS = "REFS";
   public static final String SUBJECT = "SUBJECT";
-  public File input;
-  public String outputDir;
-  public String prefix;
-  public int chunkSize;
-  public Charset charset;
-  public String separator;
-  public String bodySeparator = "\n";
-  public boolean includeBody;
-  public Pattern[] patternsToMatch;
+
+  private File input;
+  private String outputDir;
+  private String prefix;
+  private int chunkSize;
+  private Charset charset;
+  private String separator;
+  private String bodySeparator = "\n";
+  private boolean includeBody;
+  private Pattern[] patternsToMatch;
   //maps FROM, TO, REFS, SUBJECT, etc. to the order they appear in patternsToMatch.  See MailToRecMapper
-  public Map<String, Integer> patternOrder;
+  private Map<String, Integer> patternOrder;
+
+  public File getInput() {
+    return input;
+  }
+
+  public void setInput(File input) {
+    this.input = input;
+  }
+
+  public String getOutputDir() {
+    return outputDir;
+  }
+
+  public void setOutputDir(String outputDir) {
+    this.outputDir = outputDir;
+  }
+
+  public String getPrefix() {
+    return prefix;
+  }
+
+  public void setPrefix(String prefix) {
+    this.prefix = prefix;
+  }
+
+  public int getChunkSize() {
+    return chunkSize;
+  }
+
+  public void setChunkSize(int chunkSize) {
+    this.chunkSize = chunkSize;
+  }
+
+  public Charset getCharset() {
+    return charset;
+  }
+
+  public void setCharset(Charset charset) {
+    this.charset = charset;
+  }
+
+  public String getSeparator() {
+    return separator;
+  }
+
+  public void setSeparator(String separator) {
+    this.separator = separator;
+  }
+
+  public String getBodySeparator() {
+    return bodySeparator;
+  }
+
+  public void setBodySeparator(String bodySeparator) {
+    this.bodySeparator = bodySeparator;
+  }
+
+  public boolean isIncludeBody() {
+    return includeBody;
+  }
+
+  public void setIncludeBody(boolean includeBody) {
+    this.includeBody = includeBody;
+  }
+
+  public Pattern[] getPatternsToMatch() {
+    return patternsToMatch;
+  }
+
+  public void setPatternsToMatch(Pattern[] patternsToMatch) {
+    this.patternsToMatch = patternsToMatch;
+  }
+
+  public Map<String, Integer> getPatternOrder() {
+    return patternOrder;
+  }
+
+  public void setPatternOrder(Map<String, Integer> patternOrder) {
+    this.patternOrder = patternOrder;
+  }
+
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.email;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.utils.email;
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.email;
 
 import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.utils.io.ChunkedWriter;
@@ -30,10 +30,6 @@ import java.io.Writer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-/**
- *
- *
- **/
 public class MailProcessor {
   private static final Pattern MESSAGE_START =
           Pattern.compile("^From \\S+@\\S.*\\d{4}$", Pattern.CASE_INSENSITIVE);
@@ -48,9 +44,9 @@ public class MailProcessor {
                           Pattern.compile("^references: (.*)$", Pattern.CASE_INSENSITIVE);
   public static final Pattern TO_PREFIX =
                                   Pattern.compile("^to: (.*)$", Pattern.CASE_INSENSITIVE);
-  private String prefix;
-  private MailOptions options;
-  private WrappedWriter writer;
+  private final String prefix;
+  private final MailOptions options;
+  private final WrappedWriter writer;
 
   public MailProcessor(MailOptions options, String prefix, Writer writer) {
     this.writer = new IOWriterWrapper(writer);
@@ -70,17 +66,17 @@ public class MailProcessor {
       StringBuilder contents = new StringBuilder();
       // tmps used during mail message parsing
       StringBuilder body = new StringBuilder();
-      String messageId = null;
-      boolean inBody = false;
       Matcher messageIdMatcher = MESSAGE_ID_PREFIX.matcher("");
       Matcher messageBoundaryMatcher = MESSAGE_START.matcher("");
-      String[] patternResults = new String[options.patternsToMatch.length];
-      Matcher[] matchers = new Matcher[options.patternsToMatch.length];
+      String[] patternResults = new String[options.getPatternsToMatch().length];
+      Matcher[] matchers = new Matcher[options.getPatternsToMatch().length];
       for (int i = 0; i < matchers.length; i++) {
-        matchers[i] = options.patternsToMatch[i].matcher("");
+        matchers[i] = options.getPatternsToMatch()[i].matcher("");
       }
 
-      for (String nextLine : new FileLineIterable(mboxFile, options.charset, false)) {
+      String messageId = null;
+      boolean inBody = false;
+      for (String nextLine : new FileLineIterable(mboxFile, options.getCharset(), false)) {
         for (int i = 0; i < matchers.length; i++) {
           Matcher matcher = matchers[i];
           matcher.reset(nextLine);
@@ -97,7 +93,7 @@ public class MailProcessor {
             // done parsing this message ... write it out
             String key = generateKey(mboxFile, prefix, messageId);
             //if this ordering changes, then also change FromEmailToDictionaryMapper
-            writeContent(options.separator, contents, body, patternResults);
+            writeContent(options.getSeparator(), contents, body, patternResults);
             writer.write(key, contents.toString());
             contents.setLength(0); // reset the buffer
             body.setLength(0);
@@ -105,9 +101,9 @@ public class MailProcessor {
             messageId = null;
             inBody = false;
           } else {
-            if (inBody && options.includeBody) {
+            if (inBody && options.isIncludeBody()) {
               if (nextLine.length() > 0) {
-                body.append(nextLine).append(options.bodySeparator);
+                body.append(nextLine).append(options.getBodySeparator());
               }
             } else {
               // first empty line we see after reading the message Id
@@ -128,7 +124,7 @@ public class MailProcessor {
       // write the last message in the file if available
       if (messageId != null) {
         String key = generateKey(mboxFile, prefix, messageId);
-        writeContent(options.separator, contents, body, patternResults);
+        writeContent(options.getSeparator(), contents, body, patternResults);
         writer.write(key, contents.toString());
         contents.setLength(0); // reset the buffer
       }
@@ -139,7 +135,7 @@ public class MailProcessor {
     return messageCount;
   }
 
-  protected String generateKey(File mboxFile, String prefix, String messageId) {
+  protected static String generateKey(File mboxFile, String prefix, String messageId) {
     return prefix + File.separator + mboxFile.getName() + File.separator + messageId;
   }
 
@@ -151,11 +147,10 @@ public class MailProcessor {
     return options;
   }
 
-  private void writeContent(String separator, StringBuilder contents, StringBuilder body, String[] matches) {
-    for (int i = 0; i < matches.length; i++) {
-      String match = matches[i];
+  private static void writeContent(String separator, StringBuilder contents, CharSequence body, String[] matches) {
+    for (String match : matches) {
       contents.append(match).append(separator);
     }
-    contents.append("\n").append(body);
+    contents.append('\n').append(body);
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/ChunkedWrapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/ChunkedWrapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/ChunkedWrapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/ChunkedWrapper.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.io;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,15 +15,13 @@ package org.apache.mahout.utils.io;
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.io;
 
 import java.io.IOException;
 
-/**
-*
-*
-**/
-public class ChunkedWrapper extends WrappedWriter {
-  ChunkedWriter writer;
+public class ChunkedWrapper implements WrappedWriter {
+
+  private final ChunkedWriter writer;
 
   public ChunkedWrapper(ChunkedWriter writer) {
     this.writer = writer;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/IOWriterWrapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/IOWriterWrapper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/IOWriterWrapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/IOWriterWrapper.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.io;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,16 +15,14 @@ package org.apache.mahout.utils.io;
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.io;
 
 import java.io.IOException;
 import java.io.Writer;
 
-/**
-*
-*
-**/
-public class IOWriterWrapper extends WrappedWriter {
-  Writer writer;
+public class IOWriterWrapper implements WrappedWriter {
+
+  private final Writer writer;
 
   public IOWriterWrapper(Writer writer) {
     this.writer = writer;
@@ -33,7 +30,7 @@ public class IOWriterWrapper extends Wra
 
   @Override
   public void write(String key, String value) throws IOException {
-    writer.write(key + " " + value);
+    writer.write(key + ' ' + value);
   }
 
   @Override

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/WrappedWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/WrappedWriter.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/WrappedWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/io/WrappedWriter.java Sat Oct 15 14:08:33 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.io;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,17 +15,16 @@ package org.apache.mahout.utils.io;
  * limitations under the License.
  */
 
+package org.apache.mahout.utils.io;
 
 import java.io.Closeable;
 import java.io.IOException;
 
 /**
-* Convenience class for wrapping either a java.io.Writer or a SequenceFile.Writer with some basic functionality
-*
-**/
-public abstract class WrappedWriter implements Closeable {
-  public abstract void write(String key, String value) throws IOException;
+ * Convenience class for wrapping either a java.io.Writer or a SequenceFile.Writer with some basic functionality
+ */
+public interface WrappedWriter extends Closeable {
+
+  void write(String key, String value) throws IOException;
 
-  @Override
-  public abstract void close() throws IOException;
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java Sat Oct 15 14:08:33 2011
@@ -119,7 +119,7 @@ public final class VectorHelper {
       if (line.startsWith("#")) {
         continue;
       }
-      String[] tokens = VectorHelper.TAB_PATTERN.split(line);
+      String[] tokens = TAB_PATTERN.split(line);
       if (tokens.length < 3) {
         continue;
       }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java Sat Oct 15 14:08:33 2011
@@ -102,7 +102,7 @@ public class ARFFVectorIterable implemen
           type = ARFFType.NOMINAL;
           //@ATTRIBUTE class        {Iris-setosa,Iris-versicolor,Iris-virginica}
           int classIdx = lower.indexOf(ARFFType.NOMINAL.getIndicator());
-          String [] classes = ARFFVectorIterable.COMMA_PATTERN.split(line.substring(classIdx + 1, line.length() - 1));
+          String[] classes = COMMA_PATTERN.split(line.substring(classIdx + 1, line.length() - 1));
           for (int i = 0; i < classes.length; i++) {
             model.addNominal(label, classes[i].trim(), i);
           }
@@ -113,7 +113,7 @@ public class ARFFVectorIterable implemen
           //TODO: DateFormatter map
           DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
           int idx = lower.indexOf(ARFFType.DATE.getIndicator());
-          String[] split = ARFFVectorIterable.SPACE_PATTERN.split(line);
+          String[] split = SPACE_PATTERN.split(line);
           if (split.length >= 4) { //we have a date format
             String formStr = line.substring(idx + ARFFType.DATE.getIndicator().length()).trim();
             if (formStr.startsWith("\"")) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=1183642&r1=1183641&r2=1183642&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Sat Oct 15 14:08:33 2011
@@ -84,7 +84,7 @@ public class MapBackedARFFModel implemen
   @Override
   public double getValue(String data, int idx) {
     ARFFType type = typeMap.get(idx);
-    data = MapBackedARFFModel.QUOTE_PATTERN.matcher(data).replaceAll("");
+    data = QUOTE_PATTERN.matcher(data).replaceAll("");
     data = data.trim();
     double result;
     switch (type) {
@@ -127,7 +127,7 @@ public class MapBackedARFFModel implemen
 
   // Not sure how scalable this is going to be
   protected double processString(String data) {
-    data = MapBackedARFFModel.QUOTE_PATTERN.matcher(data).replaceAll("");
+    data = QUOTE_PATTERN.matcher(data).replaceAll("");
     // map it to an long
     Long theLong = words.get(data);
     if (theLong == null) {