You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC

svn commit: r1352052 [5/7] - in /mahout/trunk: ./ buildtools/ buildtools/src/main/resources/ core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/cf/t...

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Wed Jun 20 12:07:50 2012
@@ -19,7 +19,6 @@ package org.apache.mahout.common;
 
 import com.google.common.collect.Lists;
 
-import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.util.List;
@@ -68,8 +67,7 @@ public final class DummyRecordWriter<K, 
 
   public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper,
                                                                       Configuration configuration,
-                                                                      RecordWriter<K2, V2> output)
-    throws IOException, InterruptedException {
+                                                                      RecordWriter<K2, V2> output) {
 
     // Use reflection since the context types changed incompatibly between 0.20
     // and 0.23.
@@ -88,8 +86,7 @@ public final class DummyRecordWriter<K, 
                                                                        Configuration configuration,
                                                                        RecordWriter<K2, V2> output,
                                                                        Class<K1> keyClass,
-                                                                       Class<V1> valueClass)
-    throws IOException, InterruptedException {
+                                                                       Class<V1> valueClass) {
 
     // Use reflection since the context types changed incompatibly between 0.20
     // and 0.23.
@@ -113,7 +110,7 @@ public final class DummyRecordWriter<K, 
         new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
 
     Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
-    Object wrappedMapper = wrappedMapperClass.newInstance();
+    Object wrappedMapper = wrappedMapperClass.getConstructor().newInstance();
     Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
     return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
   }
@@ -147,7 +144,7 @@ public final class DummyRecordWriter<K, 
       valueClass);
 
     Class<?> wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
-    Object wrappedReducer = wrappedReducerClass.newInstance();
+    Object wrappedReducer = wrappedReducerClass.getConstructor().newInstance();
     Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
     return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
   }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Wed Jun 20 12:07:50 2012
@@ -32,7 +32,7 @@ public final class DummyStatusReporter e
   private final Map<Enum<?>, Counter> counters = Maps.newHashMap();
   private final Map<String, Counter> counterGroups = Maps.newHashMap();
 
-  private Counter newCounter() {
+  private static Counter newCounter() {
     try {
       // 0.23 case
       String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
@@ -69,7 +69,7 @@ public final class DummyStatusReporter e
   }
 
   public float getProgress() {
-    return 0;
+    return 0.0f;
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java Wed Jun 20 12:07:50 2012
@@ -65,6 +65,6 @@ public final class StringUtilsTest exten
   @Test
   public void testEscape() throws Exception {
     String res = StringUtils.escapeXML("\",\',&,>,<");
-    assertTrue(res.equals("_,_,_,_,_"));
+    assertEquals("_,_,_,_,_", res);
   }
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -43,17 +44,18 @@ public final class FPGrowthRetailDataTes
 
   private static final Logger log = LoggerFactory.getLogger(PFPGrowthRetailDataTestVs.class);
 
-  private long bestResults(Map<Set<String>, Long> res, Set<String> feats) {
+  private static long bestResults(Map<Set<String>, Long> res, Set<String> feats) {
     Long best = res.get(feats);
-    if (best != null) 
+    if (best != null) {
       return best;
-    else 
-      best = -1L;
+    }
+    best = -1L;
     for (Map.Entry<Set<String>, Long> ent : res.entrySet()) { 
       Set<String> r = ent.getKey();
       Long supp = ent.getValue();
-      if (supp <= best) 
+      if (supp <= best) {
         continue;
+      }
       boolean hasAll = true;
       for (String f : feats) {
         if (!r.contains(f)) {
@@ -61,16 +63,17 @@ public final class FPGrowthRetailDataTes
           break;
         }
       }
-      if (hasAll) 
+      if (hasAll) {
         best = supp;
+      }
     }
     return best;
   }
 
   private static class MapCollector implements OutputCollector<String,List<Pair<List<String>,Long>>> {
-    private Map<Set<String>,Long> results;
+    private final Map<Set<String>,Long> results;
 
-    public MapCollector(Map<Set<String>,Long> results) {
+    private MapCollector(Map<Set<String>, Long> results) {
       this.results = results;
     }
 
@@ -84,7 +87,7 @@ public final class FPGrowthRetailDataTes
     }
   }
 
-  private class DummyUpdater implements StatusUpdater {
+  private static class DummyUpdater implements StatusUpdater {
     @Override
     public void update(String status) { }
   }
@@ -93,7 +96,7 @@ public final class FPGrowthRetailDataTes
   public void testVsWithRetailData() throws IOException {
     String inputFilename = "retail.dat";
     int minSupport = 500;
-    Set<String> returnableFeatures = new HashSet<String>();
+    Collection<String> returnableFeatures = new HashSet<String>();
     
     org.apache.mahout.fpm.pfpgrowth.fpgrowth.
       FPGrowth<String> fp1 = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();
@@ -118,8 +121,10 @@ public final class FPGrowthRetailDataTes
       new HashSet<String>(),
       new MapCollector(initialResults2), new DummyUpdater());
 
-    Map<Set<String>, Long> results2 = new HashMap<Set<String>, Long>();    
-    if (!returnableFeatures.isEmpty()) {
+    Map<Set<String>, Long> results2;
+    if (returnableFeatures.isEmpty()) {
+      results2 = initialResults2;
+    } else {
       Map<Set<String>, Long> tmpResult = new HashMap<Set<String>, Long>();
       for (Map.Entry<Set<String>, Long> result2 : initialResults2.entrySet()) {
         Set<String> r2feats = result2.getKey();
@@ -130,25 +135,23 @@ public final class FPGrowthRetailDataTes
             break;
           }
         }
-        if (hasSome) 
+        if (hasSome) {
           tmpResult.put(result2.getKey(), result2.getValue());
+        }
       }
       results2 = tmpResult;
-    } else {
-      results2 = initialResults2;
-  }
+    }
 
-    boolean allMatch = true;
-    allMatch &= hasAll(results1, results2);
+    boolean allMatch = hasAll(results1, results2);
     log.info("checked "+results1.size()+" itemsets iterating through #1");
 
     allMatch &= hasAll(results2, results1);
     log.info("checked "+results2.size()+" itemsets iterating through #2");
 
-    assertEquals( "Had mismatches!", allMatch, true);
+    assertTrue("Had mismatches!", allMatch);
   }
 
-  public boolean hasAll(Map<Set<String>, Long> ref, Map<Set<String>, Long> other) {
+  public static boolean hasAll(Map<Set<String>, Long> ref, Map<Set<String>, Long> other) {
     boolean hasAll = true;
     for (Map.Entry<Set<String>, Long> refEnt : ref.entrySet()) {
       Set<String> feats = refEnt.getKey();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -44,10 +45,9 @@ public final class FPGrowthSyntheticData
     FPGrowthObj<String> fp = new FPGrowthObj<String>();
     
     String inputFilename = "FPGsynth.dat";
-    int minSupport = 50;
 
-    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
-                                                                                                  inputFilename).openStream()), "\\s+");
+    StringRecordIterator it =
+        new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+");
     int patternCnt_10_13_1669 = 0;
     int patternCnt_10_13 = 0;
     while (it.hasNext()) {
@@ -60,11 +60,13 @@ public final class FPGrowthSyntheticData
         }
       }
     }
-    
-    if (patternCnt_10_13_1669 < minSupport) 
+
+    int minSupport = 50;
+    if (patternCnt_10_13_1669 < minSupport) {
       throw new IllegalStateException("the test is broken or data is missing ("
-                                      + patternCnt_10_13_1669+", "
-                                      + patternCnt_10_13+")");
+                                          + patternCnt_10_13_1669 + ", "
+                                          + patternCnt_10_13 + ')');
+    }
 
     final Map<Set<String>,Long> results = Maps.newHashMap();
     
@@ -77,8 +79,7 @@ public final class FPGrowthSyntheticData
     returnableFeatures.add("13");
     returnableFeatures.add("1669");
     
-    fp.generateTopKFrequentPatterns(
-                                    new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+    fp.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
 
                                     fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                    .openStream()), "\\s+"), minSupport), minSupport, 100000, 
@@ -106,17 +107,18 @@ public final class FPGrowthSyntheticData
     
   }
 
-  private long highestSupport(Map<Set<String>, Long> res, Set<String> feats) {
+  private static long highestSupport(Map<Set<String>, Long> res, Set<String> feats) {
     Long best= res.get(feats);
-    if (best != null) 
+    if (best != null) {
       return best;
-    else 
-      best= -1L;
-    for (Map.Entry<Set<String>, Long> ent : res.entrySet()) { 
+    }
+    best = -1L;
+    for (Map.Entry<Set<String>, Long> ent : res.entrySet()) {
       Set<String> r= ent.getKey();
       Long supp= ent.getValue();
-      if (supp <= best) 
+      if (supp <= best) {
         continue;
+      }
       boolean hasAll= true;
       for (String f : feats) {
         if (!r.contains(f)) {
@@ -124,17 +126,16 @@ public final class FPGrowthSyntheticData
           break;
         }
       }
-      if (hasAll) 
-        best= supp;
+      if (hasAll) {
+        best = supp;
+      }
     }
     return best;
   }
 
   @Test
-    public void testVsWithSynthData() throws IOException {
-    String inputFilename= "FPGsynth.dat";
-    int minSupport= 100;
-    Set<String> returnableFeatures = new HashSet<String>();
+  public void testVsWithSynthData() throws IOException {
+    Collection<String> returnableFeatures = new HashSet<String>();
 
     // not limiting features (or including too many) can cause
     // the test to run a very long time
@@ -145,9 +146,10 @@ public final class FPGrowthSyntheticData
     FPGrowth<String> fp1 = new FPGrowth<String>();
 
     final Map<Set<String>,Long> results1 = Maps.newHashMap();
-    
-    fp1.generateTopKFrequentPatterns(
-                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+
+    String inputFilename = "FPGsynth.dat";
+    int minSupport = 100;
+    fp1.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
 
                                      fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                      .openStream()), "\\s+"), minSupport), minSupport, 1000000, 
@@ -172,8 +174,7 @@ public final class FPGrowthSyntheticData
 
     FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
     final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
-    fp2.generateTopKFrequentPatterns(
-                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+    fp2.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
 
                                      fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                      .openStream()), "\\s+"), minSupport), minSupport, 1000000, 
@@ -196,27 +197,28 @@ public final class FPGrowthSyntheticData
                                            public void update(String status) {}
                                        });
 
-    Map<Set<String>, Long> results2= new HashMap<Set<String>, Long>();    
-    if (!returnableFeatures.isEmpty()) {
-      Map<Set<String>, Long> tmpResult= new HashMap<Set<String>, Long>();
+    Map<Set<String>, Long> results2;
+    if (returnableFeatures.isEmpty()) {
+      results2 = initialResults2;
+    } else {
+      Map<Set<String>, Long> tmpResult = new HashMap<Set<String>, Long>();
       for (Map.Entry<Set<String>, Long> result2 : initialResults2.entrySet()) {
-        Set<String> r2feats= result2.getKey();
-        boolean hasSome= false;
+        Set<String> r2feats = result2.getKey();
+        boolean hasSome = false;
         for (String rf : returnableFeatures) {
           if (r2feats.contains(rf)) {
-            hasSome= true;
+            hasSome = true;
             break;
           }
         }
-        if (hasSome) 
+        if (hasSome) {
           tmpResult.put(result2.getKey(), result2.getValue());
+        }
       }
-      results2= tmpResult;
-    } else {
-      results2= initialResults2;
+      results2 = tmpResult;
     }
 
-    boolean allMatch= true;
+    boolean allMatch = true;
     int itemsetsChecked= 0;
     for (Map.Entry<Set<String>, Long> result1 : results1.entrySet()) {
       itemsetsChecked++;
@@ -243,7 +245,7 @@ public final class FPGrowthSyntheticData
     }
     System.out.println("checked "+itemsetsChecked+" itemsets iterating through #2");
 
-    assertEquals("Had mismatches!", allMatch, true);
+    assertTrue("Had mismatches!", allMatch);
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java Wed Jun 20 12:07:50 2012
@@ -124,7 +124,7 @@ public class PFPGrowthRetailDataTest ext
       } else {
         if (!expectedResults.get(key).equals(results.get(entry.getKey()))) {
           System.out.println("invalid (1): " + key + ", expected: " + expectedResults.get(key) + ", got: "
-                             +                             + results.get(entry.getKey()));
+                             + results.get(entry.getKey()));
         } else {
           System.out.println("matched (1): " + key + ", with: " + expectedResults.get(key));
         }
@@ -165,8 +165,9 @@ public class PFPGrowthRetailDataTest ext
     int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, 
                                   PFPGrowth.NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
-    if (fList.size() % numGroups != 0) 
+    if (fList.size() % numGroups != 0) {
       maxPerGroup++;
+    }
     params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
 
     PFPGrowth.startParallelFPGrowth(params, conf);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java Wed Jun 20 12:07:50 2012
@@ -165,8 +165,9 @@ public class PFPGrowthRetailDataTest2 ex
     int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, 
                                   PFPGrowth.NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
-    if (fList.size() % numGroups != 0) 
+    if (fList.size() % numGroups != 0) {
       maxPerGroup++;
+    }
     params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
 
     log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java Wed Jun 20 12:07:50 2012
@@ -136,7 +136,7 @@ public final class PFPGrowthRetailDataTe
       } else {
         if (!results2.get(key).equals(results1.get(entry.getKey()))) {
           System.out.println("invalid (1): " + key + ", expected: " + results2.get(key) + ", got: "
-                             +                             + results1.get(entry.getKey()));
+                             + results1.get(entry.getKey()));
         } else {
           System.out.println("matched (1): " + key + ", with: " + results2.get(key));
         }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java Wed Jun 20 12:07:50 2012
@@ -94,7 +94,7 @@ public class PFPGrowthSynthDataTest2 ext
   @Test
   public void testVsSequential() throws Exception {
 
-    final Map<Set<String>,Long> parallelResult = Maps.newHashMap();
+    Map<Set<String>,Long> parallelResult = Maps.newHashMap();
 
     PFPGrowth.runPFPGrowth(params);
     List<Pair<String,TopKStringPatterns>> tmpParallel = PFPGrowth.readFrequentPattern(params);
@@ -107,8 +107,6 @@ public class PFPGrowthSynthDataTest2 ext
       }
     }
 
-    //////
-
     String inputFilename= "FPGsynth.dat";
     int minSupport= 100;
 
@@ -143,11 +141,11 @@ public class PFPGrowthSynthDataTest2 ext
       if (seqResult.get(key) == null) {
         log.info("spurious (1): " + key+ " with " +entry.getValue());
       } else {
-        if (!seqResult.get(key).equals(parallelResult.get(entry.getKey()))) {
-          log.info("invalid (1): " + key + ", expected: " + seqResult.get(key) + ", got: "
-                             +                             + parallelResult.get(entry.getKey()));
-        } else {
+        if (seqResult.get(key).equals(parallelResult.get(entry.getKey()))) {
           log.info("matched (1): " + key + ", with: " + seqResult.get(key));
+        } else {
+          log.info("invalid (1): " + key + ", expected: " + seqResult.get(key) + ", got: "
+                       + parallelResult.get(entry.getKey()));
         }
       }
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java Wed Jun 20 12:07:50 2012
@@ -114,8 +114,9 @@ public final class PFPGrowthTest extends
     int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, 
                                   PFPGrowth.NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
-    if (fList.size() % numGroups != 0) 
+    if (fList.size() % numGroups != 0) {
       maxPerGroup++;
+    }
     params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
 
     log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java Wed Jun 20 12:07:50 2012
@@ -115,8 +115,9 @@ public final class PFPGrowthTest2 extend
     int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, 
                                   PFPGrowth.NUM_GROUPS_DEFAULT);
     int maxPerGroup = fList.size() / numGroups;
-    if (fList.size() % numGroups != 0) 
+    if (fList.size() % numGroups != 0) {
       maxPerGroup++;
+    }
     params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
 
     log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java Wed Jun 20 12:07:50 2012
@@ -70,7 +70,7 @@ public final class TestDistributedLanczo
     Configuration conf = new Configuration();
     corpus.setConf(conf);
     DistributedLanczosSolver solver = new DistributedLanczosSolver();
-    Vector intitialVector = solver.getInitialVector(corpus);
+    Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus);
     LanczosState state;
     if (hdfsBackedState) {
       HdfsBackedLanczosState hState = new HdfsBackedLanczosState(corpus,
@@ -96,7 +96,7 @@ public final class TestDistributedLanczo
     corpus.setConf(conf);
     DistributedLanczosSolver solver = new DistributedLanczosSolver();
     int rank = 10;
-    Vector intitialVector = solver.getInitialVector(corpus);
+    Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus);
     HdfsBackedLanczosState state = new HdfsBackedLanczosState(corpus, rank,
         intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter));
     solver.solve(state, rank, symmetric);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java Wed Jun 20 12:07:50 2012
@@ -159,11 +159,9 @@ public final class TestDistributedLanczo
       }
       for (int newRow = 0; newRow < eigenVectors2.numRows(); newRow++) {
         Vector newEigen = eigenVectors2.viewRow(newRow);
-        if (newEigen != null) {
-          if (oldEigen.dot(newEigen) > 0.9) {
-            oldEigensFound.add(row);
-            break;
-          }
+        if (newEigen != null && oldEigen.dot(newEigen) > 0.9) {
+          oldEigensFound.add(row);
+          break;
         }
       }
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.IOUtils;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseMatrix;
@@ -104,6 +105,7 @@ public class LocalSSVDPCADenseTest exten
     closeables.remove(w);
     Closeables.close(w, true);
 
+    // TODO fix test so that 1.0/m works as intended!
     xi.assign(Functions.mult(1 / m));
 
     FileSystem fs = FileSystem.get(conf);
@@ -158,9 +160,11 @@ public class LocalSSVDPCADenseTest exten
     double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);
 
     // subtract pseudo pca mean
-    for (int i = 0; i < m; i++)
-      for (int j = 0; j < n; j++)
+    for (int i = 0; i < m; i++) {
+      for (int j = 0; j < n; j++) {
         a[i][j] -= xi.getQuick(j);
+      }
+    }
 
     SingularValueDecomposition svd2 =
       new SingularValueDecomposition(new DenseMatrix(a));
@@ -180,6 +184,7 @@ public class LocalSSVDPCADenseTest exten
                                            false,
                                            s_epsilon);
 
+    IOUtils.close(closeables);
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.IOUtils;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseMatrix;
@@ -184,22 +185,7 @@ public class LocalSSVDSolverSparseSequen
                                            false,
                                            s_epsilon);
 
-    /*
-     * removing tests on U and V to keep this test leaner. I will keep U,V
-     * computation and assertions in the dense tests though.
-     */
-
-    /*
-     * double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new
-     * Path(svdOutPath, "U/[^_]*"), conf);
-     * 
-     * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(u), false,
-     * s_epsilon); double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new
-     * Path(svdOutPath, "V/[^_]*"), conf);
-     * 
-     * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(v), false,
-     * s_epsilon);
-     */
+    IOUtils.close(closeables);
   }
 
   static void dumpSv(Vector s) {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java Wed Jun 20 12:07:50 2012
@@ -148,12 +148,12 @@ public final class SequentialOutOfCoreSv
     assertEquals(u1, u2);
   }
 
-  private Matrix lowRankMatrixInMemory(int rows, int columns) throws IOException {
+  private static Matrix lowRankMatrixInMemory(int rows, int columns) throws IOException {
     return lowRankMatrix(null, null, 0, rows, columns);
   }
 
-  private void assertEquals(Matrix u1, Matrix u2) {
-    assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS), 1e-10);
+  private static void assertEquals(Matrix u1, Matrix u2) {
+    assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS), 1.0e-10);
   }
 
   @Test

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java Wed Jun 20 12:07:50 2012
@@ -194,7 +194,7 @@ public class SparseVectorsFromSequenceFi
     Path tfidfVectors = new Path(outputPath, "tfidf-vectors");
     
     DictionaryVectorizerTest.validateVectors(conf, numDocs, tfVectors, sequential, named);
-    if (tfWeighting == false) {
+    if (!tfWeighting) {
       DictionaryVectorizerTest.validateVectors(conf, numDocs, tfidfVectors, sequential, named);
     }
     return outputPath;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java Wed Jun 20 12:07:50 2012
@@ -38,8 +38,8 @@ public final class GramKeyGroupComparato
 
     assertEquals(0, cmp.compare(a, b));
     assertEquals(0, cmp.compare(a, c));
-    assertTrue(0 > cmp.compare(a, d));
-    assertTrue(0 < cmp.compare(a, e));
-    assertTrue(0 < cmp.compare(d, e));
+    assertTrue(cmp.compare(a, d) < 0);
+    assertTrue(cmp.compare(a, e) > 0);
+    assertTrue(cmp.compare(d, e) > 0);
   }
 }

Modified: mahout/trunk/distribution/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/distribution/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/distribution/pom.xml (original)
+++ mahout/trunk/distribution/pom.xml Wed Jun 20 12:07:50 2012
@@ -38,6 +38,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.3</version>
         <executions>
           <execution>
             <id>bin-assembly</id>

Modified: mahout/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/pom.xml (original)
+++ mahout/trunk/examples/pom.xml Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
+        <version>2.4</version>
         <configuration>
           <encoding>UTF-8</encoding>
           <source>1.6</source>
@@ -50,6 +51,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.4</version>
         <executions>
           <execution>
             <id>copy-dependencies</id>
@@ -68,6 +70,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.3</version>
         <executions>
           <execution>
             <id>job</id>
@@ -89,6 +92,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-remote-resources-plugin</artifactId>
+        <version>1.3</version>
         <configuration>
           <appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
           <resourceBundles>

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java Wed Jun 20 12:07:50 2012
@@ -74,8 +74,7 @@ public class BuildForest extends Configu
   private boolean isPartial; // use partial data implementation
 
   @Override
-  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException,
-    InstantiationException, IllegalAccessException {
+  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
     
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java Wed Jun 20 12:07:50 2012
@@ -19,6 +19,7 @@ package org.apache.mahout.classifier.df.
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 import java.util.Random;
 import java.util.Scanner;
@@ -254,8 +255,12 @@ public class TestForest extends Configur
     }
   }
 
-  private void testDirectory(Path outPath, DataConverter converter, DecisionForest forest,
-    Dataset dataset, List<double[]> results, Random rng) throws IOException {
+  private void testDirectory(Path outPath,
+                             DataConverter converter,
+                             DecisionForest forest,
+                             Dataset dataset,
+                             Collection<double[]> results,
+                             Random rng) throws IOException {
     Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
 
     for (Path path : infiles) {
@@ -265,8 +270,13 @@ public class TestForest extends Configur
     }
   }
 
-  private void testFile(Path inPath, Path outPath, DataConverter converter, DecisionForest forest,
-    Dataset dataset, List<double[]> results, Random rng) throws IOException {
+  private void testFile(Path inPath,
+                        Path outPath,
+                        DataConverter converter,
+                        DecisionForest forest,
+                        Dataset dataset,
+                        Collection<double[]> results,
+                        Random rng) throws IOException {
     // create the predictions file
     FSDataOutputStream ofile = null;
 
@@ -276,7 +286,7 @@ public class TestForest extends Configur
 
     FSDataInputStream input = dataFS.open(inPath);
     try {
-      Scanner scanner = new Scanner(input);
+      Scanner scanner = new Scanner(input, "UTF-8");
 
       while (scanner.hasNextLine()) {
         String line = scanner.nextLine();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Wed Jun 20 12:07:50 2012
@@ -154,12 +154,9 @@ public final class PosTagger {
       // determine the IDs
       Integer wordID = wordIDs.get(tags[0]);
       Integer tagID = tagIDs.get(tags[1]);
-      // handle unknown values
-      wordID = wordID == null ? 0 : wordID;
-      tagID = tagID == null ? 0 : tagID;
       // now construct the current sequence
-      observedSequence.add(wordID);
-      hiddenSequence.add(tagID);
+      observedSequence.add(wordID == null ? 0 : wordID);
+      hiddenSequence.add(tagID == null ? 0 : tagID);
     }
 
     // if there is still something in the pipe, register it

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java Wed Jun 20 12:07:50 2012
@@ -62,11 +62,9 @@ public class AdaptiveLogisticModelParame
 
   public void checkParameters() {
     if (prior != null) {
-      if ("TP".equals(prior.toUpperCase(Locale.ENGLISH).trim()) ||
-          "EBP".equals(prior.toUpperCase(Locale.ENGLISH).trim())) {
-        if (Double.isNaN(priorOption)) {
-          throw new IllegalArgumentException("You must specify a double value for TPrior and ElasticBandPrior.");
-        }
+      String priorUppercase = prior.toUpperCase(Locale.ENGLISH).trim();
+      if (("TP".equals(priorUppercase) || "EBP".equals(priorUppercase)) && Double.isNaN(priorOption)) {
+        throw new IllegalArgumentException("You must specify a double value for TPrior and ElasticBandPrior.");
       }
     }
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -32,10 +33,10 @@ import org.apache.mahout.math.Vector;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.HashMap;
-import java.util.Locale;
 import java.util.Map;
 
 public final class RunAdaptiveLogistic {
@@ -50,7 +51,7 @@ public final class RunAdaptiveLogistic {
   }
 
   public static void main(String[] args) throws Exception {
-    mainToOutput(args, new PrintWriter(System.out, true));
+    mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
   }
 
   static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -67,14 +68,13 @@ public final class RunAdaptiveLogistic {
 
     State<Wrapper, CrossFoldLearner> best = lr.getBest();
     if (best == null) {
-      output.printf("%s\n",
-          "AdaptiveLogisticRegression has not be trained probably.");
+      output.println("AdaptiveLogisticRegression has not be trained probably.");
       return;
     }
     CrossFoldLearner learner = best.getPayload().getLearner();
 
     BufferedReader in = TrainAdaptiveLogistic.open(inputFile);
-    BufferedWriter out = new BufferedWriter(new FileWriter(outputFile));
+    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), Charsets.UTF_8));
 
     out.write(idColumn + ",target,score");
     out.newLine();
@@ -104,13 +104,13 @@ public final class RunAdaptiveLogistic {
       }
       k++;
       if (k % 100 == 0) {
-        output.printf(Locale.ENGLISH, "%d records processed \n", k);
+        output.println(k + " records processed");
       }
       line = in.readLine();
     }
     out.flush();
     out.close();
-    output.printf(Locale.ENGLISH, "%d records processed totally.\n", k);
+    output.println(k + " records processed totally.");
   }
 
   private static boolean parseArgs(String[] args) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -32,6 +33,7 @@ import org.apache.mahout.classifier.eval
 
 import java.io.BufferedReader;
 import java.io.File;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.Locale;
 
@@ -47,7 +49,7 @@ public final class RunLogistic {
   }
 
   public static void main(String[] args) throws Exception {
-    mainToOutput(args, new PrintWriter(System.out, true));
+    mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
   }
 
   static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -67,7 +69,7 @@ public final class RunLogistic {
       csv.firstLine(line);
       line = in.readLine();
       if (showScores) {
-        output.printf(Locale.ENGLISH, "\"%s\",\"%s\",\"%s\"\n", "target", "model-output", "log-likelihood");
+        output.println("\"target\",\"model-output\",\"log-likelihood\"");
       }
       while (line != null) {
         Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Wed Jun 20 12:07:50 2012
@@ -35,8 +35,10 @@ import java.io.BufferedReader;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -78,7 +80,8 @@ public final class SimpleCsvExamples {
     long t0 = System.currentTimeMillis();
     Vector v = new DenseVector(1000);
     if ("--generate".equals(args[0])) {
-      PrintWriter out = new PrintWriter(new File(args[2]));
+      PrintWriter out =
+          new PrintWriter(new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8));
       try {
         int n = Integer.parseInt(args[1]);
         for (int i = 0; i < n; i++) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -41,6 +42,7 @@ import org.apache.mahout.vectorizer.enco
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 
 /**
@@ -57,7 +59,7 @@ public final class TestASFEmail {
   public static void main(String[] args) throws IOException {
     TestASFEmail runner = new TestASFEmail();
     if (runner.parseArgs(args)) {
-      runner.run(new PrintWriter(System.out, true));
+      runner.run(new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
     }
   }
 
@@ -70,7 +72,6 @@ public final class TestASFEmail {
 
 
     Dictionary asfDictionary = new Dictionary();
-    //<String> overallCounts = HashMultiset.create();
     Configuration conf = new Configuration();
     PathFilter testFilter = new PathFilter() {
       @Override
@@ -88,7 +89,7 @@ public final class TestASFEmail {
       numItems++;
     }
 
-    System.out.printf("%d test files\n", numItems);
+    System.out.println(numItems + " test files");
     ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
     iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
             null, true, conf);
@@ -105,7 +106,7 @@ public final class TestASFEmail {
       ra.addInstance(asfDictionary.values().get(actual), cr);
 
     }
-    output.printf("%s\n\n", ra.toString());
+    output.println(ra);
   }
 
   boolean parseArgs(String[] args) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java Wed Jun 20 12:07:50 2012
@@ -20,10 +20,12 @@ package org.apache.mahout.classifier.sgd
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.Arrays;
 import java.util.List;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -56,7 +58,7 @@ public final class TestNewsGroups {
   public static void main(String[] args) throws IOException {
     TestNewsGroups runner = new TestNewsGroups();
     if (runner.parseArgs(args)) {
-      runner.run(new PrintWriter(System.out, true));
+      runner.run(new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
     }
   }
 
@@ -64,8 +66,8 @@ public final class TestNewsGroups {
 
     File base = new File(inputFile);
     //contains the best model
-    OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class);
-
+    OnlineLogisticRegression classifier =
+        ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class);
 
     Dictionary newsGroups = new Dictionary();
     Multiset<String> overallCounts = HashMultiset.create();
@@ -77,7 +79,7 @@ public final class TestNewsGroups {
         files.addAll(Arrays.asList(newsgroup.listFiles()));
       }
     }
-    System.out.printf("%d test files\n", files.size());
+    System.out.println(files.size() + " test files");
     ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
     for (File file : files) {
       String ng = file.getParentFile().getName();
@@ -93,7 +95,7 @@ public final class TestNewsGroups {
       ra.addInstance(newsGroups.values().get(actual), cr);
 
     }
-    output.printf("%s\n\n", ra.toString());
+    output.println(ra);
   }
 
   boolean parseArgs(String[] args) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java Wed Jun 20 12:07:50 2012
@@ -35,26 +35,23 @@ import org.apache.mahout.math.VectorWrit
 import org.apache.mahout.vectorizer.encoders.Dictionary;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 
 public final class TrainASFEmail extends AbstractJob {
 
-  //private static final String[] LEAK_LABELS = {"none", "month-year", "day-month-year"};
-
   private TrainASFEmail() {
   }
 
   @Override
   public int run(String[] args) throws Exception {
-    int result = 0;
     addInputOption();
     addOutputOption();
     addOption("categories", "nc", "The number of categories to train on", true);
     addOption("cardinality", "c", "The size of the vectors to use", "100000");
     addOption("threads", "t", "The number of threads to use in the learner", "20");
-    addOption("poolSize", "p", "The number of CrossFoldLearners to use in the AdaptiveLogisticRegression.  Higher values require more memory.", "5");
+    addOption("poolSize", "p", "The number of CrossFoldLearners to use in the AdaptiveLogisticRegression. "
+                               + "Higher values require more memory.", "5");
     if (parseArguments(args) == null) {
       return -1;
     }
@@ -69,7 +66,8 @@ public final class TrainASFEmail extends
     int threadCount = Integer.parseInt(getOption("threads", "20"));
     int poolSize = Integer.parseInt(getOption("poolSize", "5"));
     Dictionary asfDictionary = new Dictionary();
-    AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(numCats, cardinality, new L1(), threadCount, poolSize);
+    AdaptiveLogisticRegression learningAlgorithm =
+        new AdaptiveLogisticRegression(numCats, cardinality, new L1(), threadCount, poolSize);
     learningAlgorithm.setInterval(800);
     learningAlgorithm.setAveragingWindow(500);
 
@@ -81,8 +79,13 @@ public final class TrainASFEmail extends
         return path.getName().contains("training");
       }
     };
-    SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, trainFilter,
-            null, true, conf);
+    SequenceFileDirIterator<Text, VectorWritable> iter =
+        new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()),
+                                                          PathType.LIST,
+                                                          trainFilter,
+                                                          null,
+                                                          true,
+                                                          conf);
     long numItems = 0;
     while (iter.hasNext()) {
       Pair<Text, VectorWritable> next = iter.next();
@@ -90,7 +93,7 @@ public final class TrainASFEmail extends
       numItems++;
     }
 
-    System.out.printf("%d training files\n", numItems);
+    System.out.println(numItems + " training files");
 
 
     SGDInfo info = new SGDInfo();
@@ -118,20 +121,20 @@ public final class TrainASFEmail extends
             learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
 
     List<Integer> counts = Lists.newArrayList();
-    System.out.printf("Word counts\n");
+    System.out.println("Word counts");
     for (String count : overallCounts.elementSet()) {
       counts.add(overallCounts.count(count));
     }
     Collections.sort(counts, Ordering.natural().reverse());
     k = 0;
     for (Integer count : counts) {
-      System.out.printf("%d\t%d\n", k, count);
+      System.out.println(k + "\t" + count);
       k++;
       if (k > 1000) {
         break;
       }
     }
-    return result;
+    return 0;
   }
 
   public static void main(String[] args) throws Exception {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.List;
 import java.util.Locale;
@@ -60,7 +61,7 @@ public final class TrainAdaptiveLogistic
   }
 
   public static void main(String[] args) throws Exception {
-    mainToOutput(args, new PrintWriter(System.out, true));
+    mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
   }
 
   static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -117,8 +118,7 @@ public final class TrainAdaptiveLogistic
         learner = best.getPayload().getLearner();
       }
       if (learner == null) {
-        output.printf(Locale.ENGLISH,
-                      "%s\n", "AdaptiveLogisticRegression has failed to train a model.");
+        output.println("AdaptiveLogisticRegression has failed to train a model.");
         return;
       }
 
@@ -131,8 +131,8 @@ public final class TrainAdaptiveLogistic
       }
 
       OnlineLogisticRegression lr = learner.getModels().get(0);
-      output.printf(Locale.ENGLISH, "%d\n", lmp.getNumFeatures());
-      output.printf(Locale.ENGLISH, "%s ~ ", lmp.getTargetVariable());
+      output.println(lmp.getNumFeatures());
+      output.println(lmp.getTargetVariable() + " ~ ");
       String sep = "";
       for (String v : csv.getTraceDictionary().keySet()) {
         double weight = predictorWeight(lr, 0, csv, v);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.List;
 import java.util.Locale;
@@ -61,7 +62,7 @@ public final class TrainLogistic {
   }
 
   public static void main(String[] args) throws Exception {
-    mainToOutput(args, new PrintWriter(System.out, true));
+    mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
   }
 
   static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -78,7 +79,6 @@ public final class TrainLogistic {
           csv.firstLine(in.readLine());
 
           String line = in.readLine();
-          int lineCount = 0;
           while (line != null) {
             // for each new line, get target and predictors
             Vector input = new RandomAccessSparseVector(lmp.getNumFeatures());
@@ -104,7 +104,6 @@ public final class TrainLogistic {
             lr.train(targetValue, input);
 
             line = in.readLine();
-            lineCount++;
           }
         } finally {
           Closeables.closeQuietly(in);
@@ -118,8 +117,8 @@ public final class TrainLogistic {
         Closeables.closeQuietly(modelOutput);
       }
 
-      output.printf(Locale.ENGLISH, "%d\n", lmp.getNumFeatures());
-      output.printf(Locale.ENGLISH, "%s ~ ", lmp.getTargetVariable());
+      output.println(lmp.getNumFeatures());
+      output.println(lmp.getTargetVariable() + " ~ ");
       String sep = "";
       for (String v : csv.getTraceDictionary().keySet()) {
         double weight = predictorWeight(lr, 0, csv, v);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Wed Jun 20 12:07:50 2012
@@ -97,7 +97,8 @@ public final class TrainNewsGroups {
 
     NewsgroupHelper helper = new NewsgroupHelper();
     helper.getEncoder().setProbes(2);
-    AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
+    AdaptiveLogisticRegression learningAlgorithm =
+        new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
     learningAlgorithm.setInterval(800);
     learningAlgorithm.setAveragingWindow(500);
 
@@ -109,7 +110,7 @@ public final class TrainNewsGroups {
       }
     }
     Collections.shuffle(files);
-    System.out.printf("%d training files\n", files.size());
+    System.out.println(files.size() + " training files");
     SGDInfo info = new SGDInfo();
 
     int k = 0;
@@ -135,14 +136,14 @@ public final class TrainNewsGroups {
             learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
 
     List<Integer> counts = Lists.newArrayList();
-    System.out.printf("Word counts\n");
+    System.out.println("Word counts");
     for (String count : overallCounts.elementSet()) {
       counts.add(overallCounts.count(count));
     }
     Collections.sort(counts, Ordering.natural().reverse());
     k = 0;
     for (Integer count : counts) {
-      System.out.printf("%d\t%d\n", k, count);
+      System.out.println(k + "\t" + count);
       k++;
       if (k > 1000) {
         break;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -20,9 +20,11 @@ package org.apache.mahout.classifier.sgd
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.Locale;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -58,7 +60,7 @@ public final class ValidateAdaptiveLogis
   }
 
   public static void main(String[] args) throws IOException {
-    mainToOutput(args, new PrintWriter(System.out, true));
+    mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
   }
 
   static void mainToOutput(String[] args, PrintWriter output) throws IOException {
@@ -83,8 +85,7 @@ public final class ValidateAdaptiveLogis
 
       State<Wrapper, CrossFoldLearner> best = lr.getBest();
       if (best == null) {
-        output.printf("%s\n",
-            "AdaptiveLogisticRegression has not be trained probably.");
+        output.println("AdaptiveLogisticRegression has not be trained probably.");
         return;
       }
       CrossFoldLearner learner = best.getPayload().getLearner();
@@ -94,8 +95,7 @@ public final class ValidateAdaptiveLogis
       csv.firstLine(line);
       line = in.readLine();
       if (showScores) {
-        output.printf(Locale.ENGLISH, "\"%s\", \"%s\", \"%s\", \"%s\"\n",
-            "target", "model-output", "log-likelihood", "average-likelihood");
+        output.println("\"target\", \"model-output\", \"log-likelihood\", \"average-likelihood\"");
       }
       while (line != null) {
         Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Wed Jun 20 12:07:50 2012
@@ -91,7 +91,7 @@ public class DisplayDirichlet extends Di
     Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
     prior.writeToSeqFiles(priorPath);
     Configuration conf = new Configuration();
-    new ClusterIterator().iterateSeq(conf, input, priorPath, output, numIterations);
+    ClusterIterator.iterateSeq(conf, input, priorPath, output, numIterations);
   }
   
   private static void runSequentialDirichletClusterer(Path input, Path output,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Wed Jun 20 12:07:50 2012
@@ -93,7 +93,7 @@ public class DisplayFuzzyKMeans extends 
     Path priorPath = new Path(output, "classifier-0");
     prior.writeToSeqFiles(priorPath);
     
-    new ClusterIterator().iterateSeq(conf, samples, priorPath, output, maxIterations);
+    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
     loadClustersWritable(output);
   }
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Wed Jun 20 12:07:50 2012
@@ -85,7 +85,7 @@ public class DisplayKMeans extends Displ
     prior.writeToSeqFiles(priorPath);
     
     int maxIter = 10;
-    new ClusterIterator().iterateSeq(conf, samples, priorPath, output, maxIter);
+    ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIter);
     loadClustersWritable(output);
   }
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java Wed Jun 20 12:07:50 2012
@@ -92,17 +92,16 @@ public class DisplayMinHash extends Disp
     LINES, POINTS, SYMBOLS
   }
 
-  private static final long serialVersionUID = 1L;
-  private transient static Logger log = LoggerFactory
-      .getLogger(DisplayMinHash.class);
+  private static final Logger log = LoggerFactory.getLogger(DisplayMinHash.class);
 
-  private static Map<String, List<Vector>> clusters = new HashMap<String, List<Vector>>();
+  private static final int SYMBOLS_FONT_SIZE = 6;
+
+  private static final Map<String, List<Vector>> clusters = new HashMap<String, List<Vector>>();
   private static Iterator<Entry<String, List<Vector>>> currentCluster;
   private static List<Vector> currentClusterPoints;
   private static int updatePeriodTime;
   private static long lastUpdateTime = 0;
   private static boolean isSlideShowOnHold = false;
-  private static int symbolsFontSize = 6;
 
   private PlotType plotType = PlotType.POINTS;
 
@@ -138,25 +137,23 @@ public class DisplayMinHash extends Disp
   private static void plotClusters(Graphics2D g2, PlotType plotType) {
     double sx = (double) res / DS;
     g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
-    Font f = new Font("Dialog", Font.PLAIN, symbolsFontSize);
+    Font f = new Font("Dialog", Font.PLAIN, SYMBOLS_FONT_SIZE);
     g2.setFont(f);
     switch (plotType) {
-    case LINES:
-      plotLines(g2);
-      break;
-    case SYMBOLS:
-      plotSymbols(g2);
-      break;
-    case POINTS:
-      plotPoints(g2);
-      break;
-    default:
-      break;
+      case LINES:
+        plotLines(g2);
+        break;
+      case SYMBOLS:
+        plotSymbols(g2);
+        break;
+      case POINTS:
+        plotPoints(g2);
+        break;
     }
   }
 
   private static void plotLines(Graphics2D g2) {
-    Random rand = new Random();
+    Random rand = RandomUtils.getRandom();
     for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
       List<Vector> vecs = entry.getValue();
 
@@ -181,15 +178,15 @@ public class DisplayMinHash extends Disp
 
   private static void plotSymbols(Graphics2D g2) {
     char symbol = 0;
-    Random rand = new Random();
+    Random rand = RandomUtils.getRandom();
     for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
       List<Vector> vecs = entry.getValue();
 
       g2.setColor(new Color(rand.nextInt()));
       symbol++;
 
-      for (int i = 0; i < vecs.size(); i++) {
-        plotSymbols(g2, vecs.get(i), symbol);
+      for (Vector vec : vecs) {
+        plotSymbols(g2, vec, symbol);
       }
     }
   }
@@ -200,7 +197,7 @@ public class DisplayMinHash extends Disp
     }
 
     if (System.currentTimeMillis() - lastUpdateTime > updatePeriodTime) {
-      plotSampleData((Graphics2D) g2);
+      plotSampleData(g2);
       currentClusterPoints = currentCluster.next().getValue();
       lastUpdateTime = System.currentTimeMillis();
     }
@@ -208,8 +205,8 @@ public class DisplayMinHash extends Disp
     g2.setColor(Color.RED);
     Vector dv = new DenseVector(2).assign(0.03);
 
-    for (int i = 0; i < currentClusterPoints.size(); i++) {
-      plotRectangle(g2, currentClusterPoints.get(i), dv);
+    for (Vector currentClusterPoint : currentClusterPoints) {
+      plotRectangle(g2, currentClusterPoint, dv);
     }
   }
 
@@ -238,8 +235,7 @@ public class DisplayMinHash extends Disp
    * The entry point to the program.
    * 
    * @param args
-   *          The command-line arguments. See {@link DisplayMinHash} for
-   *          details.
+   *          The command-line arguments.
    * 
    * @throws Exception
    *           Thrown if an error occurs during the execution.
@@ -264,6 +260,7 @@ public class DisplayMinHash extends Disp
 
     if (type == PlotType.POINTS) {
       Timer timer = new Timer(updatePeriodTime, new ActionListener() {
+        @Override
         public void actionPerformed(ActionEvent e) {
           repaint(f);
         }
@@ -293,30 +290,28 @@ public class DisplayMinHash extends Disp
   private static PlotType determinePlotType(String[] args) {
     PlotType type = PlotType.POINTS;
     if (args.length != 0) {
-      if (args[0].equals("-p")) {
+      if ("-p".equals(args[0])) {
         type = PlotType.POINTS;
-      } else if (args[0].equals("-l")) {
+      } else if ("-l".equals(args[0])) {
         type = PlotType.LINES;
-      } else if (args[0].equals("-s")) {
+      } else if ("-s".equals(args[0])) {
         type = PlotType.SYMBOLS;
       } else {
-        System.out
-            .println("Wrong parameter: -p (plot points); -l (plot lines); -s (plot symbols)");
+        System.out.println("Wrong parameter: -p (plot points); -l (plot lines); -s (plot symbols)");
       }
     }
     return type;
   }
 
   private static int determineUpdatePeriodTime(String[] args) {
-    int updatePeriodTimeInMinutes = 1;
     if (args.length >= 2) {
       try {
         updatePeriodTime = Integer.parseInt(args[1]);
-      } catch (Exception e) {
-        System.out.println(args[1]
-            + " isn't valid integer value. 1 second will be used.");
+      } catch (NumberFormatException nfe) {
+        System.out.println(args[1] + " isn't valid integer value. 1 second will be used.");
       }
     }
+    int updatePeriodTimeInMinutes = 1;
     return updatePeriodTimeInMinutes * 1000;
   }
 
@@ -334,15 +329,16 @@ public class DisplayMinHash extends Disp
   private static void logClusters() {
     int i = 0;
     for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
-      String logStr = "Cluster N:" + ++i + ": ";
+      StringBuilder logStr = new StringBuilder();
+      logStr.append("Cluster N:").append(++i).append(": ");
       List<Vector> vecs = entry.getValue();
       for (Vector vector : vecs) {
-        logStr += vector.get(0);
-        logStr += ",";
-        logStr += vector.get(1);
-        logStr += "; ";
+        logStr.append(vector.get(0));
+        logStr.append(',');
+        logStr.append(vector.get(1));
+        logStr.append("; ");
       }
-      log.info(logStr);
+      log.info(logStr.toString());
     }
   }
 
@@ -365,9 +361,7 @@ public class DisplayMinHash extends Disp
 
   private static void runMinHash(Configuration conf, Path samples, Path output)
       throws Exception {
-    MinHashDriver mhd = new MinHashDriver();
-
-    ToolRunner.run(conf, mhd, new String[] { "--input", samples.toString(),
+    ToolRunner.run(conf, new MinHashDriver(), new String[] { "--input", samples.toString(),
         "--hashType", HashFactory.HashType.MURMUR3.toString(), "--output",
         output.toString(), "--minVectorSize", "1", "--debugOutput"
 

Modified: mahout/trunk/integration/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/pom.xml (original)
+++ mahout/trunk/integration/pom.xml Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
+        <version>2.4</version>
         <configuration>
           <encoding>UTF-8</encoding>
           <source>1.6</source>
@@ -50,6 +51,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.4</version>
         <executions>
           <execution>
             <id>copy-dependencies</id>
@@ -66,6 +68,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-remote-resources-plugin</artifactId>
+        <version>1.3</version>
         <configuration>
           <appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
           <resourceBundles>

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java Wed Jun 20 12:07:50 2012
@@ -48,7 +48,9 @@ import com.google.common.collect.Lists;
  * Intended to consume ConfusionMatrix SequenceFile output by Bayes TestClassifier class
  */
 public final class ConfusionMatrixDumper extends AbstractJob {
-  
+
+  private static final String TAB_SEPARATOR = "|";
+
   // HTML wrapper - default CSS
   private static final String HEADER = "<html>"
                                        + "<head>\n"
@@ -162,7 +164,6 @@ public final class ConfusionMatrixDumper
   }
   
   private static void exportText(Path inputPath, PrintStream out) throws IOException {
-    String TAB_SEPARATOR = "|";
     MatrixWritable mw = new MatrixWritable();
     Text key = new Text();
     readSeqFile(inputPath, key, mw);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Wed Jun 20 12:07:50 2012
@@ -101,7 +101,7 @@ public final class RepresentativePointsD
    * @throws IOException
    *           if errors occur
    */
-  public static void printRepresentativePoints(Path output, int numIterations) throws IOException {
+  public static void printRepresentativePoints(Path output, int numIterations) {
     for (int i = 0; i <= numIterations; i++) {
       Path out = new Path(output, "representativePoints-" + i);
       System.out.println("Representative Points for iteration " + i);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Wed Jun 20 12:07:50 2012
@@ -105,7 +105,7 @@ public final class MailArchivesClusterin
 
   // Regex used to exclude non-alpha-numeric tokens
   private static final Pattern alphaNumeric = Pattern.compile("^[a-z][a-z0-9_]+$");
-  private final static Matcher matcher = alphaNumeric.matcher("");
+  private static final Matcher matcher = alphaNumeric.matcher("");
 
   public MailArchivesClusteringAnalyzer() {
     super(LUCENE_VERSION, STOP_WORDS);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Wed Jun 20 12:07:50 2012
@@ -20,7 +20,6 @@ import com.google.common.io.Closeables;
 
 import org.apache.commons.cli2.builder.ArgumentBuilder;
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
@@ -124,7 +123,7 @@ public final class SequenceFilesFromMail
   public int run(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
-    GroupBuilder gbuilder = new GroupBuilder();
+    //GroupBuilder gbuilder = new GroupBuilder();
 
     addInputOption();
     addOutputOption();

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java Wed Jun 20 12:07:50 2012
@@ -47,7 +47,7 @@ import org.slf4j.LoggerFactory;
 
 /**
  * <p>The Bayes example package provides some helper classes for training the Naive Bayes classifier
- * on the Twenty Newsgroups data. See {@link org.apache.mahout.examples.wikipedia.PrepareTwentyNewsgroups}
+ * on the Twenty Newsgroups data. See {@code PrepareTwentyNewsgroups}
  * for details on running the trainer and
  * formatting the Twenty Newsgroups data properly for the training.</p>
  *

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java Wed Jun 20 12:07:50 2012
@@ -98,17 +98,16 @@ public final class MatrixDumper extends 
   }
   
   private static PrintStream getPrintStream(String outputPath) throws IOException {
-    if (outputPath != null) {
-      File outputFile = new File(outputPath);
-      if (outputFile.exists()) {
-        outputFile.delete();
-      }
-      outputFile.createNewFile();
-      OutputStream os = new FileOutputStream(outputFile);
-      return new PrintStream(os);
-    } else {
+    if (outputPath == null) {
       return System.out;
     }
+    File outputFile = new File(outputPath);
+    if (outputFile.exists()) {
+      outputFile.delete();
+    }
+    outputFile.createNewFile();
+    OutputStream os = new FileOutputStream(outputFile);
+    return new PrintStream(os);
   }
   
   /**
@@ -128,10 +127,9 @@ public final class MatrixDumper extends 
   }
   
   private static String[] sortLabels(Map<String,Integer> labels) {
-    String[] sorted = new String[labels.keySet().size()];
-    for (String label: labels.keySet()) {
-      Integer index = labels.get(label);
-      sorted[index] = label;
+    String[] sorted = new String[labels.size()];
+    for (Map.Entry<String,Integer> entry : labels.entrySet()) {
+      sorted[entry.getValue()] = entry.getKey();
     }
     return sorted;
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator;
 import org.apache.mahout.math.list.IntArrayList;
 import org.apache.mahout.math.map.OpenObjectIntHashMap;
@@ -62,7 +61,7 @@ public final class SequenceFileDumper ex
       return -1;
     }
 
-    Path[] pathArr= null;
+    Path[] pathArr;
     Configuration conf = new Configuration();
     Path input = getInputPath();
     FileSystem fs = input.getFileSystem(conf);
@@ -118,8 +117,9 @@ public final class SequenceFileDumper ex
           long numItems = Long.MAX_VALUE;
           if (hasOption("numItems")) {
             numItems = Long.parseLong(getOption("numItems"));
-            if (!hasOption("quiet"))
+            if (!hasOption("quiet")) {
               writer.append("Max Items to dump: ").append(String.valueOf(numItems)).append("\n");
+            }
           }
           while (iterator.hasNext() && count < numItems) {
             Pair<?, ?> record = iterator.next();
@@ -134,8 +134,9 @@ public final class SequenceFileDumper ex
             }
             count++;
           }
-          if (!hasOption("quiet"))
+          if (!hasOption("quiet")) {
             writer.append("Count: ").append(String.valueOf(count)).append('\n');
+          }
         }
         if (facets != null) {
           List<String> keyList = new ArrayList<String>(facets.size());
@@ -166,9 +167,4 @@ public final class SequenceFileDumper ex
     new SequenceFileDumper().run(args);
   }
 
-  private static void printHelp(Group group) {
-    HelpFormatter formatter = new HelpFormatter();
-    formatter.setGroup(group);
-    formatter.print();
-  }
 }