You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC

svn commit: r1352052 [2/7] - in /mahout/trunk: ./ buildtools/ buildtools/src/main/resources/ core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/cf/t...

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java Wed Jun 20 12:07:50 2012
@@ -44,88 +44,102 @@ public final class TreeVisualizer {
     return df.format(value);
   }
 
-  private static String toStringNode(Node node, Dataset dataset, String[] attrNames,
-    Map<String, Field> fields, int layer) throws IllegalAccessException {
+  private static String toStringNode(Node node,
+                                     Dataset dataset,
+                                     String[] attrNames,
+                                     Map<String, Field> fields,
+                                     int layer) {
+
     StringBuilder buff = new StringBuilder();
 
-    if (node instanceof CategoricalNode) {
-      CategoricalNode cnode = (CategoricalNode) node;
-      int attr = (Integer) fields.get("CategoricalNode.attr").get(cnode);
-      double[] values = (double[]) fields.get("CategoricalNode.values").get(cnode);
-      Node[] childs = (Node[]) fields.get("CategoricalNode.childs").get(cnode);
-      String[][] attrValues = (String[][]) fields.get("Dataset.values").get(dataset);
-      for (int i = 0; i < childs.length; i++) {
+    try {
+
+      if (node instanceof CategoricalNode) {
+        CategoricalNode cnode = (CategoricalNode) node;
+        int attr = (Integer) fields.get("CategoricalNode.attr").get(cnode);
+        double[] values = (double[]) fields.get("CategoricalNode.values").get(cnode);
+        Node[] childs = (Node[]) fields.get("CategoricalNode.childs").get(cnode);
+        String[][] attrValues = (String[][]) fields.get("Dataset.values").get(dataset);
+        for (int i = 0; i < childs.length; i++) {
+          buff.append('\n');
+          for (int j = 0; j < layer; j++) {
+            buff.append("|   ");
+          }
+          buff.append(attrNames == null ? attr : attrNames[attr]).append(" = ").append(attrValues[attr][i]);
+          int index = ArrayUtils.indexOf(values, i);
+          if (index >= 0) {
+            buff.append(toStringNode(childs[index], dataset, attrNames, fields, layer + 1));
+          }
+        }
+      } else if (node instanceof NumericalNode) {
+        NumericalNode nnode = (NumericalNode) node;
+        int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
+        double split = (Double) fields.get("NumericalNode.split").get(nnode);
+        Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
+        Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
         buff.append('\n');
         for (int j = 0; j < layer; j++) {
           buff.append("|   ");
         }
-        buff.append((attrNames == null ? attr : attrNames[attr]) + " = " + attrValues[attr][i]);
-        int index = ArrayUtils.indexOf(values, i);
-        if (index >= 0) {
-          buff.append(toStringNode(childs[index], dataset, attrNames, fields, layer + 1));
+        buff.append(attrNames == null ? attr : attrNames[attr]).append(" < ").append(doubleToString(split));
+        buff.append(toStringNode(loChild, dataset, attrNames, fields, layer + 1));
+        buff.append('\n');
+        for (int j = 0; j < layer; j++) {
+          buff.append("|   ");
+        }
+        buff.append(attrNames == null ? attr : attrNames[attr]).append(" >= ").append(doubleToString(split));
+        buff.append(toStringNode(hiChild, dataset, attrNames, fields, layer + 1));
+      } else if (node instanceof Leaf) {
+        Leaf leaf = (Leaf) node;
+        double label = (Double) fields.get("Leaf.label").get(leaf);
+        if (dataset.isNumerical(dataset.getLabelId())) {
+          buff.append(" : ").append(doubleToString(label));
+        } else {
+          buff.append(" : ").append(dataset.getLabelString((int) label));
         }
       }
-    } else if (node instanceof NumericalNode) {
-      NumericalNode nnode = (NumericalNode) node;
-      int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
-      double split = (Double) fields.get("NumericalNode.split").get(nnode);
-      Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
-      Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
-      buff.append('\n');
-      for (int j = 0; j < layer; j++) {
-        buff.append("|   ");
-      }
-      buff.append((attrNames == null ? attr : attrNames[attr]) + " < " + doubleToString(split));
-      buff.append(toStringNode(loChild, dataset, attrNames, fields, layer + 1));
-      buff.append('\n');
-      for (int j = 0; j < layer; j++) {
-        buff.append("|   ");
-      }
-      buff.append((attrNames == null ? attr : attrNames[attr]) + " >= " + doubleToString(split));
-      buff.append(toStringNode(hiChild, dataset, attrNames, fields, layer + 1));
-    } else if (node instanceof Leaf) {
-      Leaf leaf = (Leaf) node;
-      double label = (Double) fields.get("Leaf.label").get(leaf);
-      if (dataset.isNumerical(dataset.getLabelId())) {
-        buff.append(" : ").append(doubleToString(label));
-      } else {
-        buff.append(" : ").append(dataset.getLabelString((int) label));
-      }
+
+    } catch (IllegalAccessException iae) {
+      throw new IllegalStateException(iae);
     }
 
     return buff.toString();
   }
 
-  private static Map<String, Field> getReflectMap() throws Exception {
+  private static Map<String, Field> getReflectMap() {
     Map<String, Field> fields = new HashMap<String, Field>();
 
-    Field m = CategoricalNode.class.getDeclaredField("attr");
-    m.setAccessible(true);
-    fields.put("CategoricalNode.attr", m);
-    m = CategoricalNode.class.getDeclaredField("values");
-    m.setAccessible(true);
-    fields.put("CategoricalNode.values", m);
-    m = CategoricalNode.class.getDeclaredField("childs");
-    m.setAccessible(true);
-    fields.put("CategoricalNode.childs", m);
-    m = NumericalNode.class.getDeclaredField("attr");
-    m.setAccessible(true);
-    fields.put("NumericalNode.attr", m);
-    m = NumericalNode.class.getDeclaredField("split");
-    m.setAccessible(true);
-    fields.put("NumericalNode.split", m);
-    m = NumericalNode.class.getDeclaredField("loChild");
-    m.setAccessible(true);
-    fields.put("NumericalNode.loChild", m);
-    m = NumericalNode.class.getDeclaredField("hiChild");
-    m.setAccessible(true);
-    fields.put("NumericalNode.hiChild", m);
-    m = Leaf.class.getDeclaredField("label");
-    m.setAccessible(true);
-    fields.put("Leaf.label", m);
-    m = Dataset.class.getDeclaredField("values");
-    m.setAccessible(true);
-    fields.put("Dataset.values", m);
+    try {
+      Field m = CategoricalNode.class.getDeclaredField("attr");
+      m.setAccessible(true);
+      fields.put("CategoricalNode.attr", m);
+      m = CategoricalNode.class.getDeclaredField("values");
+      m.setAccessible(true);
+      fields.put("CategoricalNode.values", m);
+      m = CategoricalNode.class.getDeclaredField("childs");
+      m.setAccessible(true);
+      fields.put("CategoricalNode.childs", m);
+      m = NumericalNode.class.getDeclaredField("attr");
+      m.setAccessible(true);
+      fields.put("NumericalNode.attr", m);
+      m = NumericalNode.class.getDeclaredField("split");
+      m.setAccessible(true);
+      fields.put("NumericalNode.split", m);
+      m = NumericalNode.class.getDeclaredField("loChild");
+      m.setAccessible(true);
+      fields.put("NumericalNode.loChild", m);
+      m = NumericalNode.class.getDeclaredField("hiChild");
+      m.setAccessible(true);
+      fields.put("NumericalNode.hiChild", m);
+      m = Leaf.class.getDeclaredField("label");
+      m.setAccessible(true);
+      fields.put("Leaf.label", m);
+      m = Dataset.class.getDeclaredField("values");
+      m.setAccessible(true);
+      fields.put("Dataset.values", m);
+    } catch (NoSuchFieldException nsfe) {
+      throw new IllegalStateException(nsfe);
+    }
     
     return fields;
   }
@@ -134,71 +148,73 @@ public final class TreeVisualizer {
    * Decision tree to String
    * @param tree
    *          Node of tree
-   * @param dataset
    * @param attrNames
    *          attribute names
    */
-  public static String toString(Node tree, Dataset dataset, String[] attrNames)
-    throws Exception {
+  public static String toString(Node tree, Dataset dataset, String[] attrNames) {
     return toStringNode(tree, dataset, attrNames, getReflectMap(), 0);
   }
 
   /**
    * Print Decision tree
-   * @param tree
-   *          Node of tree
-   * @param dataset
-   * @param attrNames
-   *          attribute names
+   * @param tree  Node of tree
+   * @param attrNames attribute names
    */
-  public static void print(Node tree, Dataset dataset, String[] attrNames) throws Exception {
+  public static void print(Node tree, Dataset dataset, String[] attrNames) {
     System.out.println(toString(tree, dataset, attrNames));
   }
 
-  private static String toStringPredict(Node node, Instance instance, Dataset dataset,
-    String[] attrNames, Map<String, Field> fields) throws IllegalAccessException {
+  private static String toStringPredict(Node node,
+                                        Instance instance,
+                                        Dataset dataset,
+                                        String[] attrNames,
+                                        Map<String, Field> fields) {
     StringBuilder buff = new StringBuilder();
 
-    if (node instanceof CategoricalNode) {
-      CategoricalNode cnode = (CategoricalNode) node;
-      int attr = (Integer) fields.get("CategoricalNode.attr").get(cnode);
-      double[] values = (double[]) fields.get("CategoricalNode.values").get(cnode);
-      Node[] childs = (Node[]) fields.get("CategoricalNode.childs").get(cnode);
-      String[][] attrValues = (String[][]) fields.get("Dataset.values").get(dataset);
-
-      int index = ArrayUtils.indexOf(values, instance.get(attr));
-      if (index >= 0) {
-        buff.append((attrNames == null ? attr : attrNames[attr]) + " = "
-          + attrValues[attr][(int) instance.get(attr)]);
-        buff.append(" -> ");
-        buff.append(toStringPredict(childs[index], instance, dataset, attrNames, fields));
-      }
-    } else if (node instanceof NumericalNode) {
-      NumericalNode nnode = (NumericalNode) node;
-      int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
-      double split = (Double) fields.get("NumericalNode.split").get(nnode);
-      Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
-      Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
-
-      if (instance.get(attr) < split) {
-        buff.append("(" + (attrNames == null ? attr : attrNames[attr]) + " = "
-          + doubleToString(instance.get(attr)) + ") < " + doubleToString(split));
-        buff.append(" -> ");
-        buff.append(toStringPredict(loChild, instance, dataset, attrNames, fields));
-      } else {
-        buff.append("(" + (attrNames == null ? attr : attrNames[attr]) + " = "
-          + doubleToString(instance.get(attr)) + ") >= " + doubleToString(split));
-        buff.append(" -> ");
-        buff.append(toStringPredict(hiChild, instance, dataset, attrNames, fields));
-      }
-    } else if (node instanceof Leaf) {
-      Leaf leaf = (Leaf) node;
-      double label = (Double) fields.get("Leaf.label").get(leaf);
-      if (dataset.isNumerical(dataset.getLabelId())) {
-        buff.append(doubleToString(label));
-      } else {
-        buff.append(dataset.getLabelString((int) label));
+    try {
+      if (node instanceof CategoricalNode) {
+        CategoricalNode cnode = (CategoricalNode) node;
+        int attr = (Integer) fields.get("CategoricalNode.attr").get(cnode);
+        double[] values = (double[]) fields.get("CategoricalNode.values").get(cnode);
+        Node[] childs = (Node[]) fields.get("CategoricalNode.childs").get(cnode);
+        String[][] attrValues = (String[][]) fields.get("Dataset.values").get(dataset);
+
+        int index = ArrayUtils.indexOf(values, instance.get(attr));
+        if (index >= 0) {
+          buff.append(attrNames == null ? attr : attrNames[attr]).append(" = ")
+              .append(attrValues[attr][(int) instance.get(attr)]);
+          buff.append(" -> ");
+          buff.append(toStringPredict(childs[index], instance, dataset, attrNames, fields));
+        }
+      } else if (node instanceof NumericalNode) {
+        NumericalNode nnode = (NumericalNode) node;
+        int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
+        double split = (Double) fields.get("NumericalNode.split").get(nnode);
+        Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
+        Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
+
+        if (instance.get(attr) < split) {
+          buff.append('(').append(attrNames == null ? attr : attrNames[attr]).append(" = ")
+              .append(doubleToString(instance.get(attr))).append(") < ").append(doubleToString(split));
+          buff.append(" -> ");
+          buff.append(toStringPredict(loChild, instance, dataset, attrNames, fields));
+        } else {
+          buff.append('(').append(attrNames == null ? attr : attrNames[attr]).append(" = ")
+              .append(doubleToString(instance.get(attr))).append(") >= ").append(doubleToString(split));
+          buff.append(" -> ");
+          buff.append(toStringPredict(hiChild, instance, dataset, attrNames, fields));
+        }
+      } else if (node instanceof Leaf) {
+        Leaf leaf = (Leaf) node;
+        double label = (Double) fields.get("Leaf.label").get(leaf);
+        if (dataset.isNumerical(dataset.getLabelId())) {
+          buff.append(doubleToString(label));
+        } else {
+          buff.append(dataset.getLabelString((int) label));
+        }
       }
+    } catch (IllegalAccessException iae) {
+      throw new IllegalStateException(iae);
     }
 
     return buff.toString();
@@ -208,12 +224,10 @@ public final class TreeVisualizer {
    * Predict trace to String
    * @param tree
    *          Node of tree
-   * @param data
    * @param attrNames
    *          attribute names
    */
-  public static String[] predictTrace(Node tree, Data data, String[] attrNames)
-    throws Exception {
+  public static String[] predictTrace(Node tree, Data data, String[] attrNames) {
     Map<String, Field> reflectMap = getReflectMap();
     String[] prediction = new String[data.size()];
     for (int i = 0; i < data.size(); i++) {
@@ -226,16 +240,13 @@ public final class TreeVisualizer {
    * Print predict trace
    * @param tree
    *          Node of tree
-   * @param data
    * @param attrNames
    *          attribute names
    */
-  public static void predictTracePrint(Node tree, Data data, String[] attrNames)
-    throws Exception {
+  public static void predictTracePrint(Node tree, Data data, String[] attrNames) {
     Map<String, Field> reflectMap = getReflectMap();
     for (int i = 0; i < data.size(); i++) {
-      System.out.println(toStringPredict(tree, data.get(i), data.getDataset(), attrNames,
-        reflectMap));
+      System.out.println(toStringPredict(tree, data.get(i), data.getDataset(), attrNames, reflectMap));
     }
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java Wed Jun 20 12:07:50 2012
@@ -159,7 +159,7 @@ public final class UDistrib {
     Path dataPath = new Path(dataStr);
     FileSystem ifs = dataPath.getFileSystem(conf);
     FSDataInputStream input = ifs.open(dataPath);
-    Scanner scanner = new Scanner(input);
+    Scanner scanner = new Scanner(input, "UTF-8");
     DataConverter converter = new DataConverter(dataset);
     int nbInstances = dataset.nbInstances();
     

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java Wed Jun 20 12:07:50 2012
@@ -37,7 +37,7 @@ import java.util.Random;
 public class Auc {
 
   private int maxBufferSize = 10000;
-  private final DoubleArrayList[] scores = { new DoubleArrayList(), new DoubleArrayList() };
+  private final DoubleArrayList[] scores = {new DoubleArrayList(), new DoubleArrayList()};
   private final Random rand;
   private int samples;
   private final double threshold;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Wed Jun 20 12:07:50 2012
@@ -46,9 +46,12 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 public final class BayesUtils {
 
+  private static final Pattern SLASH = Pattern.compile("/");
+
   private BayesUtils() {}
 
   public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {
@@ -116,7 +119,7 @@ public final class BayesUtils {
     int i = 0;
     try {
       for (Object label : labels) {
-        String theLabel = ((Pair<?,?>) label).getFirst().toString().split("/")[1];
+        String theLabel = SLASH.split(((Pair<?, ?>) label).getFirst().toString())[1];
         if (!seen.contains(theLabel)) {
           writer.append(new Text(theLabel), new IntWritable(i++));
           seen.add(theLabel);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java Wed Jun 20 12:07:50 2012
@@ -30,6 +30,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 import java.io.IOException;
+import java.util.regex.Pattern;
 
 /**
  * Run the input through the model and see if it matches.
@@ -38,12 +39,13 @@ import java.io.IOException;
  */
 public class BayesTestMapper extends Mapper<Text, VectorWritable, Text, VectorWritable> {
 
+  private static final Pattern SLASH = Pattern.compile("/");
+
   private AbstractNaiveBayesClassifier classifier;
 
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
     super.setup(context);
-    System.out.println("Setup");
     Configuration conf = context.getConfiguration();
     Path modelPath = HadoopUtil.cachedFile(conf);
     NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, conf);
@@ -59,6 +61,6 @@ public class BayesTestMapper extends Map
   protected void map(Text key, VectorWritable value, Context context) throws IOException, InterruptedException {
     Vector result = classifier.classifyFull(value.get());
     //the key is the expected value
-    context.write(new Text(key.toString().split("/")[1]), new VectorWritable(result));
+    context.write(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result));
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java Wed Jun 20 12:07:50 2012
@@ -20,6 +20,7 @@ package org.apache.mahout.classifier.nai
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -60,6 +61,7 @@ public class TestNaiveBayesDriver extend
 
   public static final String LABEL_KEY = "labels";
   public static final String COMPLEMENTARY = "class"; //b for bayes, c for complementary
+  private static final Pattern SLASH = Pattern.compile("/");
 
   public static void main(String[] args) throws Exception {
     ToolRunner.run(new Configuration(), new TestNaiveBayesDriver(), args);
@@ -95,11 +97,11 @@ public class TestNaiveBayesDriver extend
       }
       SequenceFile.Writer writer =
           new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class);
-      SequenceFile.Reader reader = new Reader(fs, getInputPath(), getConf());
+      Reader reader = new Reader(fs, getInputPath(), getConf());
       Text key = new Text();
       VectorWritable vw = new VectorWritable();
       while (reader.next(key, vw)) {
-        writer.append(new Text(key.toString().split("/")[1]),
+        writer.append(new Text(SLASH.split(key.toString())[1]),
             new VectorWritable(classifier.classifyFull(vw.get())));
       }
       writer.close();
@@ -137,8 +139,7 @@ public class TestNaiveBayesDriver extend
     //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels"));
     boolean complementary = parsedArgs.containsKey("testComplementary");
     testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary));
-    boolean succeeded = testJob.waitForCompletion(true);
-    return succeeded;
+    return testJob.waitForCompletion(true);
   }
 
   private static void analyzeResults(Map<Integer, String> labelMap,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.classifier.naivebayes.training;
 
 import java.io.IOException;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
@@ -28,6 +29,8 @@ import org.apache.mahout.math.map.OpenOb
 
 public class IndexInstancesMapper extends Mapper<Text, VectorWritable, IntWritable, VectorWritable> {
 
+  private static final Pattern SLASH = Pattern.compile("/");
+
   public enum Counter { SKIPPED_INSTANCES }
 
   private OpenObjectIntHashMap<String> labelIndex;
@@ -40,7 +43,7 @@ public class IndexInstancesMapper extend
 
   @Override
   protected void map(Text labelText, VectorWritable instance, Context ctx) throws IOException, InterruptedException {
-    String label = labelText.toString().split("/")[1]; 
+    String label = SLASH.split(labelText.toString())[1];
     if (labelIndex.containsKey(label)) {
       ctx.write(new IntWritable(labelIndex.get(label)), instance);
     } else {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Wed Jun 20 12:07:50 2012
@@ -30,6 +30,7 @@ import org.apache.mahout.classifier.naiv
 import org.apache.mahout.classifier.naivebayes.NaiveBayesModel;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
@@ -98,18 +99,32 @@ public final class TrainNaiveBayesJob ex
     HadoopUtil.cacheFiles(labPath, getConf());
 
     //add up all the vectors with the same labels, while mapping the labels into our index
-    Job indexInstances = prepareJob(getInputPath(), getTempPath(SUMMED_OBSERVATIONS), SequenceFileInputFormat.class,
-            IndexInstancesMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class,
-            VectorWritable.class, SequenceFileOutputFormat.class);
+    Job indexInstances = prepareJob(getInputPath(),
+                                    getTempPath(SUMMED_OBSERVATIONS),
+                                    SequenceFileInputFormat.class,
+                                    IndexInstancesMapper.class,
+                                    IntWritable.class,
+                                    VectorWritable.class,
+                                    VectorSumReducer.class,
+                                    IntWritable.class,
+                                    VectorWritable.class,
+                                    SequenceFileOutputFormat.class);
     indexInstances.setCombinerClass(VectorSumReducer.class);
     boolean succeeded = indexInstances.waitForCompletion(true);
     if (!succeeded) {
       return -1;
     }
     //sum up all the weights from the previous step, per label and per feature
-    Job weightSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS), getTempPath(WEIGHTS),
-            SequenceFileInputFormat.class, WeightsMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class,
-            Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
+    Job weightSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS),
+                                  getTempPath(WEIGHTS),
+                                  SequenceFileInputFormat.class,
+                                  WeightsMapper.class,
+                                  Text.class,
+                                  VectorWritable.class,
+                                  VectorSumReducer.class,
+                                  Text.class,
+                                  VectorWritable.class,
+                                  SequenceFileOutputFormat.class);
     weightSummer.getConfiguration().set(WeightsMapper.NUM_LABELS, String.valueOf(labelSize));
     weightSummer.setCombinerClass(VectorSumReducer.class);
     succeeded = weightSummer.waitForCompletion(true);
@@ -120,10 +135,18 @@ public final class TrainNaiveBayesJob ex
     //put the per label and per feature vectors into the cache
     HadoopUtil.cacheFiles(getTempPath(WEIGHTS), getConf());
     
-    //calculate the Thetas, write out to LABEL_THETA_NORMALIZER vectors -- TODO: add reference here to the part of the Rennie paper that discusses this
-    Job thetaSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS), getTempPath(THETAS),
-            SequenceFileInputFormat.class, ThetaMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class,
-            Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
+    //calculate the Thetas, write out to LABEL_THETA_NORMALIZER vectors --
+    // TODO: add reference here to the part of the Rennie paper that discusses this
+    Job thetaSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS),
+                                 getTempPath(THETAS),
+                                 SequenceFileInputFormat.class,
+                                 ThetaMapper.class,
+                                 Text.class,
+                                 VectorWritable.class,
+                                 VectorSumReducer.class,
+                                 Text.class,
+                                 VectorWritable.class,
+                                 SequenceFileOutputFormat.class);
     thetaSummer.setCombinerClass(VectorSumReducer.class);
     thetaSummer.getConfiguration().setFloat(ThetaMapper.ALPHA_I, alphaI);
     thetaSummer.getConfiguration().setBoolean(ThetaMapper.TRAIN_COMPLEMENTARY, trainComplementary);
@@ -147,8 +170,11 @@ public final class TrainNaiveBayesJob ex
       Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
       labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
     } else if (hasOption(EXTRACT_LABELS)) {
-      SequenceFileDirIterable<Text, IntWritable> iterable =
-              new SequenceFileDirIterable<Text, IntWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), getConf());
+      Iterable<Pair<Text,IntWritable>> iterable =
+          new SequenceFileDirIterable<Text, IntWritable>(getInputPath(),
+                                                         PathType.LIST,
+                                                         PathFilters.logsCRCFilter(),
+                                                         getConf());
       labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
     }
     return labelSize;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java Wed Jun 20 12:07:50 2012
@@ -98,7 +98,7 @@ public final class BaumWelchTrainer {
       List<Integer> observations = new ArrayList<Integer>();
 
       //reading observations
-      Scanner scanner = new Scanner(new FileInputStream(input));
+      Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8");
       try {
         while (scanner.hasNextInt()) {
           observations.add(scanner.nextInt());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
 
 package org.apache.mahout.classifier.sequencelearning.hmm;
 
+import com.google.common.base.Charsets;
 import com.google.common.io.Closeables;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -33,6 +34,7 @@ import java.io.DataInputStream;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 
 /**
@@ -90,7 +92,7 @@ public final class RandomSequenceGenerat
       int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis());
 
       //writing output
-      PrintWriter writer = new PrintWriter(new FileOutputStream(output), true);
+      PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
       try {
         for (int observation : observations) {
           writer.print(observation);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sequencelearning.hmm;
 
+import com.google.common.base.Charsets;
 import com.google.common.io.Closeables;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -33,6 +34,7 @@ import java.io.DataInputStream;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.List;
@@ -90,7 +92,7 @@ public final class ViterbiEvaluator {
 
       //reading observations
       List<Integer> observations = new ArrayList<Integer>();
-      Scanner scanner = new Scanner(new FileInputStream(input));
+      Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8");
       try {
         while (scanner.hasNextInt()) {
           observations.add(scanner.nextInt());
@@ -108,7 +110,7 @@ public final class ViterbiEvaluator {
       int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true);
 
       //writing output
-      PrintWriter writer = new PrintWriter(new FileOutputStream(output), true);
+      PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
       try {
         for (int hiddenState : hiddenStates) {
           writer.print(hiddenState);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java Wed Jun 20 12:07:50 2012
@@ -82,7 +82,7 @@ public abstract class AbstractOnlineLogi
    *          of v is disturbed.
    * @return A version of v with the link function applied.
    */
-  public Vector link(Vector v) {
+  public static Vector link(Vector v) {
     double max = v.maxValue();
     if (max >= 40) {
       // if max > 40, we subtract the large offset first
@@ -101,7 +101,7 @@ public abstract class AbstractOnlineLogi
    * @param r The value to transform.
    * @return The logit of r.
    */
-  public double link(double r) {
+  public static double link(double r) {
     if (r < 0.0) {
       double s = Math.exp(r);
       return s / (1.0 + s);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java Wed Jun 20 12:07:50 2012
@@ -116,7 +116,7 @@ public class AdaptiveLogisticRegression 
     Wrapper w = new Wrapper(numCategories, numFeatures, prior);
     seed.setPayload(w);
 
-    w.setMappings(seed);
+    Wrapper.setMappings(seed);
     seed.setPayload(w);
     setPoolSize(this.poolSize);
   }
@@ -180,7 +180,7 @@ public class AdaptiveLogisticRegression 
         // mutation rates small and also hack their learning rate to be small
         // as well.
         for (State<Wrapper, CrossFoldLearner> state : ep.getPopulation().subList(0, SURVIVORS)) {
-          state.getPayload().freeze(state);
+          Wrapper.freeze(state);
         }
       }
     }
@@ -411,7 +411,7 @@ public class AdaptiveLogisticRegression 
       wrapped.decayExponent(0);
     }
 
-    public void freeze(State<Wrapper, CrossFoldLearner> s) {
+    public static void freeze(State<Wrapper, CrossFoldLearner> s) {
       // radically decrease learning rate
       s.getParams()[1] -= 10;
 
@@ -423,7 +423,7 @@ public class AdaptiveLogisticRegression 
       }
     }
 
-    public void setMappings(State<Wrapper, CrossFoldLearner> x) {
+    public static void setMappings(State<Wrapper, CrossFoldLearner> x) {
       int i = 0;
       // set the range for regularization (lambda)
       x.setMap(i++, Mapping.logLimit(1.0e-8, 0.1));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java Wed Jun 20 12:07:50 2012
@@ -130,6 +130,7 @@ public abstract class AbstractCluster im
     this.id = id;
   }
   
+  @Override
   public long getNumObservations() {
     return numObservations;
   }
@@ -142,6 +143,7 @@ public abstract class AbstractCluster im
     this.numObservations = l;
   }
   
+  @Override
   public long getTotalObservations() {
     return totalObservations;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed Jun 20 12:07:50 2012
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.clustering.canopy;
 
-import static org.apache.mahout.clustering.topdown.PathDirectory.CLUSTERED_POINTS_DIRECTORY;
-
 import java.io.IOException;
 import java.util.Collection;
 
@@ -39,6 +37,7 @@ import org.apache.mahout.clustering.clas
 import org.apache.mahout.clustering.classify.ClusterClassifier;
 import org.apache.mahout.clustering.iterator.CanopyClusteringPolicy;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
+import org.apache.mahout.clustering.topdown.PathDirectory;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.HadoopUtil;
@@ -155,13 +154,12 @@ public class CanopyDriver extends Abstra
     Path clustersOut = buildClusters(conf, input, output, measure, t1, t2, t3,
         t4, clusterFilter, runSequential);
     if (runClustering) {
-      clusterData(conf, input, clustersOut, output, clusterClassificationThreshold, runSequential);
+      clusterData(input, clustersOut, output, clusterClassificationThreshold, runSequential);
     }
   }
 
   /**
    * Convenience method to provide backward compatibility
-   * @param clusterClassificationThreshold TODO
    */
   public static void run(Configuration conf, Path input, Path output,
       DistanceMeasure measure, double t1, double t2, boolean runClustering,
@@ -365,12 +363,17 @@ public class CanopyDriver extends Abstra
     return canopyOutputDir;
   }
 
-  private static void clusterData(Configuration conf, Path points, Path canopies, Path output,
-      double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException,
-      ClassNotFoundException {
+  private static void clusterData(Path points,
+                                  Path canopies,
+                                  Path output,
+                                  double clusterClassificationThreshold,
+                                  boolean runSequential)
+      throws IOException, InterruptedException, ClassNotFoundException {
     ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
-    ClusterClassificationDriver.run(points, output, new Path(output, CLUSTERED_POINTS_DIRECTORY),
-        clusterClassificationThreshold, true, runSequential);
+    ClusterClassificationDriver.run(points,
+                                    output,
+                                    new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+                                    clusterClassificationThreshold, true, runSequential);
   }
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationConfigKeys.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationConfigKeys.java Wed Jun 20 12:07:50 2012
@@ -20,12 +20,14 @@ package org.apache.mahout.clustering.cla
 /**
  * Constants used in Cluster Classification.
  */
-public class ClusterClassificationConfigKeys {
+public final class ClusterClassificationConfigKeys {
   
   public static final String CLUSTERS_IN = "clusters_in";
   
   public static final String OUTLIER_REMOVAL_THRESHOLD = "pdf_threshold";
   
   public static final String EMIT_MOST_LIKELY = "emit_most_likely";
-  
+
+  private ClusterClassificationConfigKeys() {
+  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java Wed Jun 20 12:07:50 2012
@@ -17,10 +17,6 @@
 
 package org.apache.mahout.clustering.classify;
 
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.CLUSTERS_IN;
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.EMIT_MOST_LIKELY;
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -56,7 +52,7 @@ import org.apache.mahout.math.VectorWrit
  * Classifies the vectors into different clusters found by the clustering
  * algorithm.
  */
-public class ClusterClassificationDriver extends AbstractJob {
+public final class ClusterClassificationDriver extends AbstractJob {
   
   /**
    * CLI to run Cluster Classification Driver.
@@ -98,7 +94,8 @@ public class ClusterClassificationDriver
   /**
    * Constructor to be used by the ToolRunner.
    */
-  private ClusterClassificationDriver() {}
+  private ClusterClassificationDriver() {
+  }
   
   public static void main(String[] args) throws Exception {
     ToolRunner.run(new Configuration(), new ClusterClassificationDriver(), args);
@@ -158,13 +155,12 @@ public class ClusterClassificationDriver
    */
   private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
     List<Cluster> clusterModels = new ArrayList<Cluster>();
-    Cluster cluster = null;
     Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
     Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
         PathFilters.partFilter(), null, false, conf);
     while (it.hasNext()) {
       ClusterWritable next = (ClusterWritable) it.next();
-      cluster = (Cluster) next.getValue();
+      Cluster cluster = next.getValue();
       cluster.configure(conf);
       clusterModels.add(cluster);
     }
@@ -174,8 +170,7 @@ public class ClusterClassificationDriver
   private static Path finalClustersPath(Configuration conf, Path clusterOutputPath) throws IOException {
     FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
     FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
-    Path finalClustersPath = clusterFiles[0].getPath();
-    return finalClustersPath;
+    return clusterFiles[0].getPath();
   }
   
   /**
@@ -246,17 +241,17 @@ public class ClusterClassificationDriver
    * @return whether the vector should be classified or not.
    */
   private static boolean shouldClassify(Vector pdfPerCluster, Double clusterClassificationThreshold) {
-    boolean isMaxPDFGreatherThanThreshold = pdfPerCluster.maxValue() >= clusterClassificationThreshold;
-    return isMaxPDFGreatherThanThreshold;
+    return pdfPerCluster.maxValue() >= clusterClassificationThreshold;
   }
   
   private static void classifyClusterMR(Configuration conf, Path input, Path clustersIn, Path output,
       Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException, InterruptedException,
       ClassNotFoundException {
     
-    conf.setFloat(OUTLIER_REMOVAL_THRESHOLD, clusterClassificationThreshold.floatValue());
-    conf.setBoolean(EMIT_MOST_LIKELY, emitMostLikely);
-    conf.set(CLUSTERS_IN, clustersIn.toUri().toString());
+    conf.setFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD,
+                  clusterClassificationThreshold.floatValue());
+    conf.setBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, emitMostLikely);
+    conf.set(ClusterClassificationConfigKeys.CLUSTERS_IN, clustersIn.toUri().toString());
     
     Job job = new Job(conf, "Cluster Classification Driver running over input: " + input);
     job.setJarByClass(ClusterClassificationDriver.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java Wed Jun 20 12:07:50 2012
@@ -17,10 +17,6 @@
 
 package org.apache.mahout.clustering.classify;
 
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.CLUSTERS_IN;
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.EMIT_MOST_LIKELY;
-import static org.apache.mahout.clustering.classify.ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -50,7 +46,7 @@ import org.apache.mahout.math.VectorWrit
 public class ClusterClassificationMapper extends
     Mapper<WritableComparable<?>,VectorWritable,IntWritable,WeightedVectorWritable> {
   
-  private static double threshold;
+  private double threshold;
   private List<Cluster> clusterModels;
   private ClusterClassifier clusterClassifier;
   private IntWritable clusterId;
@@ -58,14 +54,13 @@ public class ClusterClassificationMapper
   private boolean emitMostLikely;
   
   @Override
-  protected void setup(Context context) throws IOException,
-      InterruptedException {
+  protected void setup(Context context) throws IOException, InterruptedException {
     super.setup(context);
     
     Configuration conf = context.getConfiguration();
-    String clustersIn = conf.get(CLUSTERS_IN);
-    threshold = conf.getFloat(OUTLIER_REMOVAL_THRESHOLD, 0.0f);
-    emitMostLikely = conf.getBoolean(EMIT_MOST_LIKELY, false);
+    String clustersIn = conf.get(ClusterClassificationConfigKeys.CLUSTERS_IN);
+    threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f);
+    emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false);
     
     clusterModels = new ArrayList<Cluster>();
     
@@ -119,37 +114,29 @@ public class ClusterClassificationMapper
     context.write(clusterId, weightedVW);
   }
   
-  public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf)
-      throws IOException {
+  public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
     List<Cluster> clusters = new ArrayList<Cluster>();
-    Cluster cluster = null;
     FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
-    FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath,
-        PathFilters.finalPartFilter());
+    FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
     Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
         clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
         null, false, conf);
     while (it.hasNext()) {
       ClusterWritable next = (ClusterWritable) it.next();
-      cluster = next.getValue();
+      Cluster cluster = next.getValue();
       cluster.configure(conf);
       clusters.add(cluster);
     }
     return clusters;
   }
   
-  private static boolean shouldClassify(Vector pdfPerCluster) {
-    boolean isMaxPDFGreatherThanThreshold = pdfPerCluster.maxValue() >= threshold;
-    return isMaxPDFGreatherThanThreshold;
+  private boolean shouldClassify(Vector pdfPerCluster) {
+    return pdfPerCluster.maxValue() >= threshold;
   }
   
-  private static Path finalClustersPath(Path clusterOutputPath)
-      throws IOException {
-    FileSystem fileSystem = clusterOutputPath
-        .getFileSystem(new Configuration());
-    FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath,
-        PathFilters.finalPartFilter());
-    Path finalClustersPath = clusterFiles[0].getPath();
-    return finalClustersPath;
+  private static Path finalClustersPath(Path clusterOutputPath) throws IOException {
+    FileSystem fileSystem = clusterOutputPath.getFileSystem(new Configuration());
+    FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
+    return clusterFiles[0].getPath();
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/WeightedPropertyVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/WeightedPropertyVectorWritable.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/WeightedPropertyVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/WeightedPropertyVectorWritable.java Wed Jun 20 12:07:50 2012
@@ -80,7 +80,7 @@ public class WeightedPropertyVectorWrita
   @Override
   public String toString() {
     Vector vector = getVector();
-    StringBuilder bldr = new StringBuilder("wt: ").append(getWeight()).append(" ");
+    StringBuilder bldr = new StringBuilder("wt: ").append(getWeight()).append(' ');
     if (properties != null && !properties.isEmpty()) {
       for (Map.Entry<Text, Text> entry : properties.entrySet()) {
         bldr.append(entry.getKey().toString()).append(": ").append(entry.getValue().toString()).append(' ');

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Wed Jun 20 12:07:50 2012
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.clustering.dirichlet;
 
-import static org.apache.mahout.clustering.topdown.PathDirectory.CLUSTERED_POINTS_DIRECTORY;
-
 import java.io.IOException;
 import java.util.List;
 
@@ -36,6 +34,7 @@ import org.apache.mahout.clustering.diri
 import org.apache.mahout.clustering.dirichlet.models.GaussianClusterDistribution;
 import org.apache.mahout.clustering.iterator.ClusterIterator;
 import org.apache.mahout.clustering.iterator.DirichletClusteringPolicy;
+import org.apache.mahout.clustering.topdown.PathDirectory;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -200,9 +199,9 @@ public class DirichletDriver extends Abs
     prior.writeToSeqFiles(clustersIn);
     
     if (runSequential) {
-      new ClusterIterator().iterateSeq(conf, input, clustersIn, output, maxIterations);
+      ClusterIterator.iterateSeq(conf, input, clustersIn, output, maxIterations);
     } else {
-      new ClusterIterator().iterateMR(conf, input, clustersIn, output, maxIterations);
+      ClusterIterator.iterateMR(conf, input, clustersIn, output, maxIterations);
     }
     return output;
     
@@ -218,10 +217,6 @@ public class DirichletDriver extends Abs
    *          the directory pathname for input state
    * @param output
    *          the directory pathname for output points
-   * @param alpha0
-   *          TODO
-   * @param numModels
-   *          TODO
    * @param emitMostLikely
    *          a boolean if true emit only most likely cluster for each point
    * @param threshold
@@ -233,7 +228,7 @@ public class DirichletDriver extends Abs
       int numModels, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException,
       InterruptedException, ClassNotFoundException {
     ClusterClassifier.writePolicy(new DirichletClusteringPolicy(numModels, alpha0), stateIn);
-    ClusterClassificationDriver.run(conf, input, output, new Path(output, CLUSTERED_POINTS_DIRECTORY), threshold,
+    ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold,
         emitMostLikely, runSequential);
   }
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistributionDescription.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistributionDescription.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistributionDescription.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistributionDescription.java Wed Jun 20 12:07:50 2012
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.clustering.dirichlet.models;
 
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
 import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configuration;
@@ -40,7 +38,10 @@ public final class DistributionDescripti
   private final String distanceMeasure;
   private final int prototypeSize;
   
-  public DistributionDescription(String modelFactory, String modelPrototype, String distanceMeasure, int prototypeSize) {
+  public DistributionDescription(String modelFactory,
+                                 String modelPrototype,
+                                 String distanceMeasure,
+                                 int prototypeSize) {
     this.modelFactory = modelFactory;
     this.modelPrototype = modelPrototype;
     this.distanceMeasure = distanceMeasure;
@@ -65,36 +66,24 @@ public final class DistributionDescripti
   
   /**
    * Create an instance of AbstractVectorModelDistribution from the given command line arguments
-   * 
-   * @param conf
-   *          the Configuration
    */
   public ModelDistribution<VectorWritable> createModelDistribution(Configuration conf) {
-    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    AbstractVectorModelDistribution modelDistribution;
-    try {
-      modelDistribution = ClassUtils.instantiateAs(modelFactory, AbstractVectorModelDistribution.class);
-      
-      Class<? extends Vector> vcl = ccl.loadClass(modelPrototype).asSubclass(Vector.class);
-      Constructor<? extends Vector> v = vcl.getConstructor(int.class);
-      modelDistribution.setModelPrototype(new VectorWritable(v.newInstance(prototypeSize)));
+    AbstractVectorModelDistribution modelDistribution =
+        ClassUtils.instantiateAs(modelFactory, AbstractVectorModelDistribution.class);
+
+    Vector prototype = ClassUtils.instantiateAs(modelPrototype,
+                                                Vector.class,
+                                                new Class<?>[] {int.class},
+                                                new Object[] {prototypeSize});
       
-      if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
-        DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);
-        measure.configure(conf);
-        ((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
-      }
-    } catch (ClassNotFoundException cnfe) {
-      throw new IllegalStateException(cnfe);
-    } catch (NoSuchMethodException nsme) {
-      throw new IllegalStateException(nsme);
-    } catch (InstantiationException ie) {
-      throw new IllegalStateException(ie);
-    } catch (IllegalAccessException iae) {
-      throw new IllegalStateException(iae);
-    } catch (InvocationTargetException ite) {
-      throw new IllegalStateException(ite);
+    modelDistribution.setModelPrototype(new VectorWritable(prototype));
+
+    if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
+      DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);
+      measure.configure(conf);
+      ((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
     }
+
     return modelDistribution;
   }
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java Wed Jun 20 12:07:50 2012
@@ -47,10 +47,7 @@ public class GaussianCluster extends Abs
   public Model<VectorWritable> sampleFromPosterior() {
     return new GaussianCluster(getCenter(), getRadius(), getId());
   }
-  
-  /* (non-Javadoc)
-   * @see org.apache.mahout.clustering.AbstractCluster#setRadius(org.apache.mahout.math.Vector)
-   */
+
   @Override
   protected void setRadius(Vector s2) {
     super.setRadius(s2);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Wed Jun 20 12:07:50 2012
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import static org.apache.mahout.clustering.topdown.PathDirectory.CLUSTERED_POINTS_DIRECTORY;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -33,6 +31,7 @@ import org.apache.mahout.clustering.iter
 import org.apache.mahout.clustering.iterator.ClusteringPolicy;
 import org.apache.mahout.clustering.iterator.FuzzyKMeansClusteringPolicy;
 import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
+import org.apache.mahout.clustering.topdown.PathDirectory;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.HadoopUtil;
@@ -283,9 +282,9 @@ public class FuzzyKMeansDriver extends A
     prior.writeToSeqFiles(priorClustersPath);
     
     if (runSequential) {
-      new ClusterIterator().iterateSeq(conf, input, priorClustersPath, output, maxIterations);
+      ClusterIterator.iterateSeq(conf, input, priorClustersPath, output, maxIterations);
     } else {
-      new ClusterIterator().iterateMR(conf, input, priorClustersPath, output, maxIterations);
+      ClusterIterator.iterateMR(conf, input, priorClustersPath, output, maxIterations);
     }
     return output;
   }
@@ -321,7 +320,7 @@ public class FuzzyKMeansDriver extends A
     throws IOException, ClassNotFoundException, InterruptedException {
     
     ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn);
-    ClusterClassificationDriver.run(input, output, new Path(output, CLUSTERED_POINTS_DIRECTORY), threshold, true,
+    ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, true,
         runSequential);
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIMapper.java Wed Jun 20 12:07:50 2012
@@ -18,16 +18,8 @@ import org.apache.mahout.math.VectorWrit
 public class CIMapper extends Mapper<WritableComparable<?>,VectorWritable,IntWritable,ClusterWritable> {
   
   private ClusterClassifier classifier;
-  
   private ClusteringPolicy policy;
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper
-   * .Context)
-   */
+
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
@@ -38,13 +30,7 @@ public class CIMapper extends Mapper<Wri
     policy.update(classifier);
     super.setup(context);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object,
-   * java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
-   */
+
   @Override
   protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException,
       InterruptedException {
@@ -55,14 +41,7 @@ public class CIMapper extends Mapper<Wri
       classifier.train(el.index(), value.get(), el.get());
     }
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.hadoop.mapreduce.Mapper#cleanup(org.apache.hadoop.mapreduce.
-   * Mapper.Context)
-   */
+
   @Override
   protected void cleanup(Context context) throws IOException, InterruptedException {
     List<Cluster> clusters = classifier.getModels();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java Wed Jun 20 12:07:50 2012
@@ -53,14 +53,7 @@ public class CIReducer extends Reducer<I
     classifier.close();
     context.write(key, first);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper
-   * .Context)
-   */
+
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CanopyClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CanopyClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CanopyClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CanopyClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -29,20 +29,10 @@ import org.apache.mahout.math.Vector;
  * 
  */
 public class CanopyClusteringPolicy extends AbstractClusteringPolicy {
-  
-  public CanopyClusteringPolicy() {
-    super();
-  }
-  
-  private double t1, t2;
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.
-   * math.Vector)
-   */
+
+  private double t1;
+  private double t2;
+
   @Override
   public Vector select(Vector probabilities) {
     int maxValueIndex = probabilities.maxValueIndex();
@@ -50,23 +40,13 @@ public class CanopyClusteringPolicy exte
     weights.set(maxValueIndex, 1.0);
     return weights;
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(t1);
     out.writeDouble(t2);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.t1 = in.readDouble();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusterIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusterIterator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusterIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusterIterator.java Wed Jun 20 12:07:50 2012
@@ -46,15 +46,16 @@ import com.google.common.io.Closeables;
  * algorithm (currently k-means, fuzzy-k-means and Dirichlet) that processes all the input vectors in each iteration.
  * The cluster classifier is configured with a ClusteringPolicy to select the desired clustering algorithm.
  */
-public class ClusterIterator {
+public final class ClusterIterator {
   
   public static final String PRIOR_PATH_KEY = "org.apache.mahout.clustering.prior.path";
+
+  private ClusterIterator() {
+  }
   
   /**
    * Iterate over data using a prior-trained ClusterClassifier, for a number of iterations
-   * 
-   * @param policy
-   *          the ClusteringPolicy to use
+   *
    * @param data
    *          a {@code List<Vector>} of input vectors
    * @param classifier
@@ -64,7 +65,7 @@ public class ClusterIterator {
    * 
    * @return the posterior ClusterClassifier
    */
-  public ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
+  public static ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
     ClusteringPolicy policy = classifier.getPolicy();
     for (int iteration = 1; iteration <= numIterations; iteration++) {
       for (Vector vector : data) {
@@ -100,11 +101,9 @@ public class ClusterIterator {
    *          a Path of output directory
    * @param numIterations
    *          the int number of iterations to perform
-   * 
-   * @throws IOException
    */
-  public void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
-      throws IOException {
+  public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
+    throws IOException {
     ClusterClassifier classifier = new ClusterClassifier();
     classifier.readFromSeqFiles(conf, priorPath);
     Path clustersOut = null;
@@ -155,8 +154,8 @@ public class ClusterIterator {
    * @param numIterations
    *          the int number of iterations to perform
    */
-  public void iterateMR(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  public static void iterateMR(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
+    throws IOException, InterruptedException, ClassNotFoundException {
     ClusteringPolicy policy = ClusterClassifier.readPolicy(priorPath);
     Path clustersOut = null;
     int iteration = 1;
@@ -164,7 +163,6 @@ public class ClusterIterator {
       conf.set(PRIOR_PATH_KEY, priorPath.toString());
       
       String jobName = "Cluster Iterator running iteration " + iteration + " over priorPath: " + priorPath;
-      System.out.println(jobName);
       Job job = new Job(conf, jobName);
       job.setMapOutputKeyClass(IntWritable.class);
       job.setMapOutputValueClass(ClusterWritable.class);
@@ -205,7 +203,7 @@ public class ClusterIterator {
    * @throws IOException
    *           if there was an IO error
    */
-  private boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
+  private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
     for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
       SequenceFileValueIterator<ClusterWritable> iterator = new SequenceFileValueIterator<ClusterWritable>(
           part.getPath(), true, conf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -36,7 +36,7 @@ public interface ClusteringPolicy extend
    * @return a Vector of probabilities that the data is described by each of the
    *         models
    */
-  public Vector classify(Vector data, ClusterClassifier prior);
+  Vector classify(Vector data, ClusterClassifier prior);
   
   /**
    * Return a vector of weights for each of the models given those probabilities
@@ -45,7 +45,7 @@ public interface ClusteringPolicy extend
    *          a Vector of pdfs
    * @return a Vector of weights
    */
-  public Vector select(Vector probabilities);
+  Vector select(Vector probabilities);
   
   /**
    * Update the policy with the given classifier
@@ -53,7 +53,7 @@ public interface ClusteringPolicy extend
    * @param posterior
    *          a ClusterClassifier
    */
-  public void update(ClusterClassifier posterior);
+  void update(ClusterClassifier posterior);
   
   /**
    * Close the policy using the classifier's models
@@ -61,6 +61,6 @@ public interface ClusteringPolicy extend
    * @param posterior
    *          a posterior ClusterClassifier
    */
-  public void close(ClusterClassifier posterior);
+  void close(ClusterClassifier posterior);
   
 }
\ No newline at end of file

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/DirichletClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/DirichletClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/DirichletClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/DirichletClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,6 @@ import org.apache.mahout.math.VectorWrit
 public class DirichletClusteringPolicy extends AbstractClusteringPolicy {
   
   public DirichletClusteringPolicy() {
-    super();
   }
   
   /**
@@ -51,14 +50,7 @@ public class DirichletClusteringPolicy e
   
   // Alpha_0 primes the Dirichlet distribution
   private double alpha0;
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.
-   * math.Vector)
-   */
+
   @Override
   public Vector select(Vector probabilities) {
     int rMultinom = UncommonDistributions.rMultinom(probabilities.times(mixture));
@@ -68,13 +60,7 @@ public class DirichletClusteringPolicy e
   }
   
   // update the total counts and then the mixture
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.mahout.clustering.ClusteringPolicy#update(org.apache.mahout.
-   * clustering.ClusterClassifier)
-   */
+
   @Override
   public void update(ClusterClassifier prior) {
     Vector totalCounts = new DenseVector(prior.getModels().size());
@@ -83,23 +69,13 @@ public class DirichletClusteringPolicy e
     }
     mixture = UncommonDistributions.rDirichlet(totalCounts, alpha0);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(alpha0);
     VectorWritable.writeVector(out, mixture);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.alpha0 = in.readDouble();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -36,26 +36,18 @@ import com.google.common.collect.Lists;
  * 
  */
 public class FuzzyKMeansClusteringPolicy extends AbstractClusteringPolicy {
-  
-  public FuzzyKMeansClusteringPolicy() {
-    super();
-  }
-  
+
   private double m = 2;
-  
   private double convergenceDelta = 0.05;
-  
+
+  public FuzzyKMeansClusteringPolicy() {
+  }
+
   public FuzzyKMeansClusteringPolicy(double m, double convergenceDelta) {
     this.m = m;
+    this.convergenceDelta = convergenceDelta;
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.
-   * math.Vector)
-   */
+
   @Override
   public Vector select(Vector probabilities) {
     return probabilities;
@@ -74,23 +66,13 @@ public class FuzzyKMeansClusteringPolicy
     fuzzyKMeansClusterer.setM(m);
     return fuzzyKMeansClusterer.computePi(clusters, distances);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(m);
     out.writeDouble(convergenceDelta);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.m = in.readDouble();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/KMeansClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/KMeansClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/KMeansClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/KMeansClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -31,31 +31,19 @@ import org.apache.mahout.clustering.clas
 public class KMeansClusteringPolicy extends AbstractClusteringPolicy {
   
   public KMeansClusteringPolicy() {
-    super();
   }
   
   public KMeansClusteringPolicy(double convergenceDelta) {
-    super();
     this.convergenceDelta = convergenceDelta;
   }
   
   private double convergenceDelta = 0.001;
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(convergenceDelta);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.convergenceDelta = in.readDouble();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/MeanShiftClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/MeanShiftClusteringPolicy.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/MeanShiftClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/MeanShiftClusteringPolicy.java Wed Jun 20 12:07:50 2012
@@ -27,17 +27,11 @@ import java.io.IOException;
  */
 public class MeanShiftClusteringPolicy extends AbstractClusteringPolicy {
   
-  public MeanShiftClusteringPolicy() {
-    super();
-  }
-  
-  private double t1, t2, t3, t4;
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
+  private double t1;
+  private double t2;
+  private double t3;
+  private double t4;
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(t1);
@@ -45,12 +39,7 @@ public class MeanShiftClusteringPolicy e
     out.writeDouble(t3);
     out.writeDouble(t4);
   }
-  
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.t1 = in.readDouble();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Wed Jun 20 12:07:50 2012
@@ -16,8 +16,6 @@
  */
 package org.apache.mahout.clustering.kmeans;
 
-import static org.apache.mahout.clustering.topdown.PathDirectory.CLUSTERED_POINTS_DIRECTORY;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -31,6 +29,7 @@ import org.apache.mahout.clustering.clas
 import org.apache.mahout.clustering.iterator.ClusterIterator;
 import org.apache.mahout.clustering.iterator.ClusteringPolicy;
 import org.apache.mahout.clustering.iterator.KMeansClusteringPolicy;
+import org.apache.mahout.clustering.topdown.PathDirectory;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.HadoopUtil;
@@ -224,9 +223,9 @@ public class KMeansDriver extends Abstra
     prior.writeToSeqFiles(priorClustersPath);
     
     if (runSequential) {
-      new ClusterIterator().iterateSeq(conf, input, priorClustersPath, output, maxIterations);
+      ClusterIterator.iterateSeq(conf, input, priorClustersPath, output, maxIterations);
     } else {
-      new ClusterIterator().iterateMR(conf, input, priorClustersPath, output, maxIterations);
+      ClusterIterator.iterateMR(conf, input, priorClustersPath, output, maxIterations);
     }
     return output;
   }
@@ -257,7 +256,7 @@ public class KMeansDriver extends Abstra
       log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] {input, clustersIn, output, measure});
     }
     ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn);
-    ClusterClassificationDriver.run(input, output, new Path(output, CLUSTERED_POINTS_DIRECTORY),
+    ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
         clusterClassificationThreshold, true, runSequential);
   }
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Kluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Kluster.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Kluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Kluster.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,8 @@ public class Kluster extends DistanceMea
   private boolean converged;
   
   /** For (de)serialization as a Writable */
-  public Kluster() {}
+  public Kluster() {
+  }
   
   /**
    * Construct a new cluster with the given point as its center