You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/11/09 14:19:28 UTC

svn commit: r1032979 [1/2] - in /mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/impl/common/ cf/taste/impl/model/ classifier/ classifier/bayes/ classifier/bayes/algorithm/ classifier/bayes/interfaces/ classifier/naivebayes/ classifier/naiv...

Author: srowen
Date: Tue Nov  9 13:19:26 2010
New Revision: 1032979

URL: http://svn.apache.org/viewvc?rev=1032979&view=rev
Log:
Style changes, mostly weakening types

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java Tue Nov  9 13:19:26 2010
@@ -122,8 +122,11 @@ public final class Cache<K,V> implements
   
   private V getAndCacheValue(K key) throws TasteException {
     V value = retriever.get(key);
+    if (value == null) {
+      value = (V) NULL;
+    }
     synchronized (cache) {
-      cache.put(key, value == null ? (V) NULL : value);
+      cache.put(key, value);
     }
     return value;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Tue Nov  9 13:19:26 2010
@@ -47,7 +47,7 @@ public final class FastByIDMap<V> implem
   private V[] values;
   private int numEntries;
   private int numSlotsUsed;
-  private int maxSize;
+  private final int maxSize;
   private BitSet recentlyAccessed;
   private final boolean countingAccesses;
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Tue Nov  9 13:19:26 2010
@@ -63,7 +63,7 @@ public final class FastMap<K,V> implemen
   private V[] values;
   private int numEntries;
   private int numSlotsUsed;
-  private int maxSize;
+  private final int maxSize;
   private BitSet recentlyAccessed;
   private final boolean countingAccesses;
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java Tue Nov  9 13:19:26 2010
@@ -92,7 +92,7 @@ public final class GenericDataModel exte
       for (Preference preference : prefs) {
         long itemID = preference.getItemID();
         itemIDSet.add(itemID);
-        List<Preference> prefsForItem = (List<Preference>) prefsForItems.get(itemID);
+        Collection<Preference> prefsForItem = prefsForItems.get(itemID);
         if (prefsForItem == null) {
           prefsForItem = new ArrayList<Preference>(2);
           prefsForItems.put(itemID, prefsForItem);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Tue Nov  9 13:19:26 2010
@@ -18,6 +18,7 @@
 package org.apache.mahout.classifier;
 
 import java.text.DecimalFormat;
+import java.text.NumberFormat;
 import java.util.Collection;
 
 import org.apache.commons.lang.StringUtils;
@@ -84,7 +85,7 @@ public class ResultAnalyzer implements S
     int totalClassified = correctlyClassified + incorrectlyClassified;
     double percentageCorrect = (double) 100 * correctlyClassified / totalClassified;
     double percentageIncorrect = (double) 100 * incorrectlyClassified / totalClassified;
-    DecimalFormat decimalFormatter = new DecimalFormat("0.####");
+    NumberFormat decimalFormatter = new DecimalFormat("0.####");
     
     returnString.append(StringUtils.rightPad("Correctly Classified Instances", 40)).append(": ").append(
       StringUtils.leftPad(Integer.toString(correctlyClassified), 10)).append('\t').append(

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Tue Nov  9 13:19:26 2010
@@ -130,12 +130,6 @@ public final class TestClassifier {
       BayesParameters params = new BayesParameters();
       // Setting all default values
       int gramSize = 1;
-      String classifierType = "bayes";      
-      String dataSource = "hdfs";
-      String defaultCat = "unknown";
-      String encoding = "UTF-8";
-      String alphaI = "1.0";
-      String classificationMethod = "sequential";
 
       String modelBasePath = (String) cmdLine.getValue(pathOpt);
       
@@ -143,23 +137,28 @@ public final class TestClassifier {
         gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
         
       }
-      
+
+      String classifierType = "bayes";
       if (cmdLine.hasOption(classifierType)) {
         classifierType = (String) cmdLine.getValue(typeOpt);
       }
-      
+
+      String dataSource = "hdfs";
       if (cmdLine.hasOption(dataSource)) {
         dataSource = (String) cmdLine.getValue(dataSource);
       }
-      
+
+      String defaultCat = "unknown";
       if (cmdLine.hasOption(defaultCatOpt)) {
         defaultCat = (String) cmdLine.getValue(defaultCatOpt);
       }
-      
+
+      String encoding = "UTF-8";
       if (cmdLine.hasOption(encodingOpt)) {
         encoding = (String) cmdLine.getValue(encodingOpt);
       }
-      
+
+      String alphaI = "1.0";
       if (cmdLine.hasOption(alphaOpt)) {
         alphaI = (String) cmdLine.getValue(alphaOpt);
       }
@@ -167,7 +166,8 @@ public final class TestClassifier {
       boolean verbose = cmdLine.hasOption(verboseOutputOpt);
       
       String testDirPath = (String) cmdLine.getValue(dirOpt);
-      
+
+      String classificationMethod = "sequential";
       if (cmdLine.hasOption(methodOpt)) {
         classificationMethod = (String) cmdLine.getValue(methodOpt);
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Tue Nov  9 13:19:26 2010
@@ -108,7 +108,7 @@ public class BayesAlgorithm implements A
   @Override
   public double documentWeight(final Datastore datastore,
                                final String label,
-                               String[] document) throws InvalidDatastoreException {
+                               String[] document) {
     OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(document.length / 2);
     for (String word : document) {
       if (wordList.containsKey(word)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Tue Nov  9 13:19:26 2010
@@ -117,7 +117,7 @@ public class CBayesAlgorithm implements 
   @Override
   public double documentWeight(final Datastore datastore,
                                final String label,
-                               String[] document) throws InvalidDatastoreException {
+                               String[] document) {
     OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(document.length / 2);
     for (String word : document) {
       if (wordList.containsKey(word)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java Tue Nov  9 13:19:26 2010
@@ -96,10 +96,9 @@ public interface Algorithm {
    * @param document
    *          The document
    * @return The probability
-   * @throws InvalidDatastoreException
    * @see Algorithm#featureWeight(Datastore, String, String)
    */
-  double documentWeight(Datastore datastore, String label, String[] document) throws InvalidDatastoreException;
+  double documentWeight(Datastore datastore, String label, String[] document);
   
   /**
    * Returns the labels in the given Model

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Tue Nov  9 13:19:26 2010
@@ -28,11 +28,15 @@ import org.apache.mahout.math.Vector.Ele
  * 
  */
 public abstract class AbstractNaiveBayesClassifier extends AbstractVectorClassifier { 
-  protected NaiveBayesModel model;
+  private final NaiveBayesModel model;
   
-  public AbstractNaiveBayesClassifier(NaiveBayesModel model) {
+  protected AbstractNaiveBayesClassifier(NaiveBayesModel model) {
     this.model = model;
   }
+
+  protected NaiveBayesModel getModel() {
+    return model;
+  }
   
   public abstract double getScoreForLabelFeature(int label, int feature);
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Tue Nov  9 13:19:26 2010
@@ -30,6 +30,7 @@ public class ComplementaryNaiveBayesClas
 
   @Override
   public double getScoreForLabelFeature(int label, int feature) {
+    NaiveBayesModel model = getModel();
     double result = model.getWeightMatrix().get(label, feature);
     double vocabCount = model.getVocabCount();
     double featureSum = model.getFeatureSum().get(feature);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Tue Nov  9 13:19:26 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesTrainer;
 import org.apache.mahout.math.JsonMatrixAdapter;
 import org.apache.mahout.math.JsonVectorAdapter;
@@ -155,7 +156,7 @@ public class NaiveBayesModel implements 
     
     FileSystem fs = sumVectorPath.getFileSystem(conf);
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, sumVectorPath, conf);
-    Text key = new Text();
+    Writable key = new Text();
     VectorWritable value = new VectorWritable();
 
     int featureCount = 0;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java Tue Nov  9 13:19:26 2010
@@ -30,6 +30,7 @@ public class StandardNaiveBayesClassifie
 
   @Override
   public double getScoreForLabelFeature(int label, int feature) {
+    NaiveBayesModel model = getModel();
     double result = model.getWeightMatrix().get(label, feature);
     double vocabCount = model.getVocabCount();
     double sumLabelWeight = model.getLabelSum().get(label);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java Tue Nov  9 13:19:26 2010
@@ -34,7 +34,7 @@ import org.apache.mahout.math.map.OpenOb
 
 public class NaiveBayesInstanceMapper extends Mapper<Text, VectorWritable, IntWritable, VectorWritable> {
   
-  private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+  private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
   
   @Override
   protected void map(Text key, VectorWritable value, Context context)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java Tue Nov  9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.classifier.naivebayes.BayesConstants;
 import org.apache.mahout.math.Vector;
@@ -37,7 +38,7 @@ import org.apache.mahout.math.map.OpenOb
 
 public class NaiveBayesThetaComplementaryMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
   
-  private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+  private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
   private Vector featureSum;
   private Vector labelSum;
   private Vector perLabelThetaNormalizer;
@@ -74,7 +75,7 @@ public class NaiveBayesThetaComplementar
       Path weightFile = new Path(localFiles[0].getPath());
       FileSystem fs = weightFile.getFileSystem(conf);
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, weightFile, conf);
-      Text key = new Text();
+      Writable key = new Text();
       VectorWritable value = new VectorWritable();
 
       while (reader.next(key, value)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java Tue Nov  9 13:19:26 2010
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.classifier.naivebayes.BayesConstants;
 import org.apache.mahout.math.Vector;
@@ -35,7 +36,7 @@ import org.apache.mahout.math.map.OpenOb
 
 public class NaiveBayesThetaMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
   
-  private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+  private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
   private Vector featureSum;
   private Vector labelSum;
   private Vector perLabelThetaNormalizer;
@@ -64,7 +65,7 @@ public class NaiveBayesThetaMapper exten
       Path weightFile = new Path(localFiles[0].getPath());
       FileSystem fs = weightFile.getFileSystem(conf);
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, weightFile, conf);
-      Text key = new Text();
+      Writable key = new Text();
       VectorWritable value = new VectorWritable();
 
       while (reader.next(key, value)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.classifier.nai
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
@@ -50,14 +49,17 @@ public final class NaiveBayesTrainer {
   public static final String LABEL_MAP = "labelMap";
   public static final String ALPHA_I = "alphaI";
 
+  private NaiveBayesTrainer() {
+  }
+
   public static void trainNaiveBayes(Path input,
                                       Configuration conf,
-                                      List<String> inputLabels,
+                                      Iterable<String> inputLabels,
                                       Path output,
                                       int numReducers,
                                       float alphaI,
                                       boolean trainComplementary)
-      throws IOException, InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     conf.setFloat(ALPHA_I, alphaI);
     Path labelMapPath = createLabelMapFile(inputLabels, conf, new Path(output, LABEL_MAP));
     Path classVectorPath =  new Path(output, CLASS_VECTORS);
@@ -72,7 +74,9 @@ public final class NaiveBayesTrainer {
     }
   }
 
-  private static void runNaiveBayesByLabelSummer(Path input, Configuration conf, Path labelMapPath,
+  private static void runNaiveBayesByLabelSummer(Path input,
+                                                 Configuration conf,
+                                                 Path labelMapPath,
                                                  Path output, int numReducers)
     throws IOException, InterruptedException, ClassNotFoundException {
     
@@ -99,8 +103,11 @@ public final class NaiveBayesTrainer {
     job.waitForCompletion(true);
   }
 
-  private static void runNaiveBayesWeightSummer(Path input, Configuration conf,
-                                                Path labelMapPath, Path output, int numReducers)
+  private static void runNaiveBayesWeightSummer(Path input,
+                                                Configuration conf,
+                                                Path labelMapPath,
+                                                Path output,
+                                                int numReducers)
     throws IOException, InterruptedException, ClassNotFoundException {
     
     // this conf parameter needs to be set enable serialisation of conf values
@@ -124,9 +131,12 @@ public final class NaiveBayesTrainer {
     job.waitForCompletion(true);
   }
   
-  private static void runNaiveBayesThetaSummer(Path input, Configuration conf,
-                                               Path weightFilePath, Path output, int numReducers)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void runNaiveBayesThetaSummer(Path input,
+                                               Configuration conf,
+                                               Path weightFilePath,
+                                               Path output,
+                                               int numReducers)
+    throws IOException, InterruptedException, ClassNotFoundException {
     
     // this conf parameter needs to be set enable serialisation of conf values
     conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
@@ -150,9 +160,12 @@ public final class NaiveBayesTrainer {
     job.waitForCompletion(true);
   }
 
-  private static void runNaiveBayesThetaComplementarySummer(Path input, Configuration conf,
-                                                            Path weightFilePath, Path output, int numReducers)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void runNaiveBayesThetaComplementarySummer(Path input,
+                                                            Configuration conf,
+                                                            Path weightFilePath,
+                                                            Path output,
+                                                            int numReducers)
+    throws IOException, InterruptedException, ClassNotFoundException {
     
     // this conf parameter needs to be set enable serialisation of conf values
     conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
@@ -180,14 +193,10 @@ public final class NaiveBayesTrainer {
   
   /**
    * Write the list of labels into a map file
-   * 
-   * @param wordCountPath
-   * @param dictionaryPathBase
-   * @throws IOException
    */
-  public static Path createLabelMapFile(List<String> labels,
-                                         Configuration conf,
-                                         Path labelMapPathBase) throws IOException {
+  public static Path createLabelMapFile(Iterable<String> labels,
+                                        Configuration conf,
+                                        Path labelMapPathBase) throws IOException {
     FileSystem fs = FileSystem.get(labelMapPathBase.toUri(), conf);
     Path labelMapPath = new Path(labelMapPathBase, LABEL_MAP);
     

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java Tue Nov  9 13:19:26 2010
@@ -37,9 +37,9 @@ import org.apache.mahout.math.map.OpenOb
 
 public class NaiveBayesWeightsMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
   
-  private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
-  Vector featureSum;
-  Vector labelSum;
+  private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+  private Vector featureSum;
+  private Vector labelSum;
  
   @Override
   protected void map(IntWritable key, VectorWritable value, Context context)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java Tue Nov  9 13:19:26 2010
@@ -22,10 +22,7 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 import org.apache.mahout.classifier.AbstractVectorClassifier;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.function.BinaryFunction;
-import org.apache.mahout.math.function.Functions;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -119,10 +116,10 @@ public class ModelDissector {
   }
 
   private static class Category implements Comparable<Category> {
-    int index;
-    double weight;
+    private final int index;
+    private final double weight;
 
-    public Category(int index, double weight) {
+    private Category(int index, double weight) {
       this.index = index;
       this.weight = weight;
     }
@@ -130,10 +127,15 @@ public class ModelDissector {
     @Override
     public int compareTo(Category o) {
       int r = Double.compare(Math.abs(weight), Math.abs(o.weight));
-      if (r != 0) {
-        return r;
+      if (r == 0) {
+        if (index < o.index) {
+          return -1;
+        } else if (index > o.index) {
+          return 1;
+        }
+        return 0;
       } else {
-        return index - o.index;
+        return r;
       }
     }
   }
@@ -142,7 +144,7 @@ public class ModelDissector {
     private final String feature;
     private final double value;
     private final int maxIndex;
-    private List<Category> categories;
+    private final List<Category> categories;
 
     public Weight(String feature, Vector weights) {
       this(feature, weights, 3);
@@ -151,7 +153,7 @@ public class ModelDissector {
     public Weight(String feature, Vector weights, int n) {
       this.feature = feature;
       // pick out the weight with the largest abs value, but don't forget the sign
-      PriorityQueue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural().reverse());
+      Queue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural().reverse());
       for (Vector.Element element : weights) {
         biggest.add(new Category(element.index(), element.get()));
         while (biggest.size() > n) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java Tue Nov  9 13:19:26 2010
@@ -17,7 +17,6 @@
 
 package org.apache.mahout.classifier.sgd;
 
-import com.google.common.collect.Collections2;
 import com.google.common.collect.Lists;
 import org.apache.mahout.classifier.AbstractVectorClassifier;
 import org.apache.mahout.math.Vector;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java Tue Nov  9 13:19:26 2010
@@ -27,7 +27,7 @@ import org.apache.mahout.math.VectorWrit
 
 public class DistanceMeasureCluster extends AbstractCluster {
 
-  protected DistanceMeasure measure;
+  private DistanceMeasure measure;
 
   public DistanceMeasureCluster(Vector point, int id, DistanceMeasure measure) {
     super(point, id);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java Tue Nov  9 13:19:26 2010
@@ -16,6 +16,7 @@
 package org.apache.mahout.clustering;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.mahout.classifier.AbstractVectorClassifier;
@@ -43,7 +44,7 @@ public class VectorModelClassifier exten
   public Vector classify(Vector instance) {
     Vector pdfs = new DenseVector(models.size());
     if (models.get(0) instanceof SoftCluster) {
-      List<SoftCluster> clusters = new ArrayList<SoftCluster>();
+      Collection<SoftCluster> clusters = new ArrayList<SoftCluster>();
       List<Double> distances = new ArrayList<Double>();
       for (Model<VectorWritable> model : models) {
         SoftCluster sc = (SoftCluster) model;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java Tue Nov  9 13:19:26 2010
@@ -20,7 +20,6 @@ package org.apache.mahout.clustering.can
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -31,6 +30,7 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -46,7 +46,7 @@ public class ClusterMapper extends Mappe
     canopyClusterer.emitPointToClosestCanopy(point.get(), canopies, context);
   }
 
-  private final List<Canopy> canopies = new ArrayList<Canopy>();
+  private final Collection<Canopy> canopies = new ArrayList<Canopy>();
 
   /**
    * Configure the mapper by providing its canopies. Used by unit tests.
@@ -83,7 +83,7 @@ public class ClusterMapper extends Mappe
         for (FileStatus file : files) {
           SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
           try {
-            Text key = new Text();
+            Writable key = new Text();
             Canopy value = new Canopy();
             while (reader.next(key, value)) {
               canopies.add(value);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java Tue Nov  9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -74,7 +75,7 @@ public class DirichletClusterMapper exte
       for (FileStatus s : status) {
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
         try {
-          Text key = new Text();
+          Writable key = new Text();
           DirichletCluster cluster = new DirichletCluster();
           while (reader.next(key, cluster)) {
             clusters.add(cluster);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Tue Nov  9 13:19:26 2010
@@ -257,8 +257,7 @@ public class DirichletClusterer {
     }
     // then pick one cluster by sampling a Multinomial distribution based upon them
     // see: http://en.wikipedia.org/wiki/Multinomial_distribution
-    int k = UncommonDistributions.rMultinom(pi);
-    return k;
+    return UncommonDistributions.rMultinom(pi);
   }
 
   protected void updateModels(Cluster[] newModels) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java Tue Nov  9 13:19:26 2010
@@ -30,7 +30,7 @@ import org.apache.mahout.math.VectorWrit
  */
 public class DistanceMeasureClusterDistribution extends AbstractVectorModelDistribution {
 
-  DistanceMeasure measure;
+  private DistanceMeasure measure;
 
   public DistanceMeasureClusterDistribution() {
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Tue Nov  9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,7 @@ public class FuzzyKMeansClusterMapper ex
    * @param clusters
    *          a List<Cluster>
    */
-  void setup(List<SoftCluster> clusters, Configuration conf) {
+  void setup(Collection<SoftCluster> clusters, Configuration conf) {
     this.clusters.clear();
     this.clusters.addAll(clusters);
     this.clusterer = new FuzzyKMeansClusterer(conf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java Tue Nov  9 13:19:26 2010
@@ -234,7 +234,7 @@ public class FuzzyKMeansClusterer {
     }
   }
 
-  public Vector computePi(List<SoftCluster> clusters, List<Double> clusterDistanceList) {
+  public Vector computePi(Collection<SoftCluster> clusters, List<Double> clusterDistanceList) {
     Vector pi = new DenseVector(clusters.size());
     for (int i = 0; i < clusters.size(); i++) {
       double probWeight = computeProbWeight(clusterDistanceList.get(i), clusterDistanceList);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Tue Nov  9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -67,7 +68,7 @@ public class FuzzyKMeansMapper extends M
    * @param clusters
    *          a List<Cluster>
    */
-  void config(List<SoftCluster> clusters) {
+  void config(Collection<SoftCluster> clusters) {
     this.clusters.clear();
     this.clusters.addAll(clusters);
   }  

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java Tue Nov  9 13:19:26 2010
@@ -19,8 +19,8 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -59,7 +59,7 @@ public class FuzzyKMeansReducer extends 
     Configuration conf = context.getConfiguration();
     clusterer = new FuzzyKMeansClusterer(conf);
 
-    List<SoftCluster> clusters = new ArrayList<SoftCluster>();
+    Collection<SoftCluster> clusters = new ArrayList<SoftCluster>();
     String clusterPath = conf.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
     if ((clusterPath != null) && (clusterPath.length() > 0)) {
       FuzzyKMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
@@ -71,7 +71,7 @@ public class FuzzyKMeansReducer extends 
     }
   }
 
-  private void setClusterMap(List<SoftCluster> clusters) {
+  private void setClusterMap(Collection<SoftCluster> clusters) {
     clusterMap.clear();
     for (SoftCluster cluster : clusters) {
       clusterMap.put(cluster.getIdentifier(), cluster);
@@ -79,7 +79,7 @@ public class FuzzyKMeansReducer extends 
     clusters.clear();
   }
 
-  public void setup(List<SoftCluster> clusters, Configuration conf) {
+  public void setup(Collection<SoftCluster> clusters, Configuration conf) {
     setClusterMap(clusters);
     clusterer = new FuzzyKMeansClusterer(conf);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -41,11 +41,11 @@ final class FuzzyKMeansUtil {
   }
 
   /** Configure the mapper with the cluster info */
-  public static void configureWithClusterInfo(Path clusterPathStr, List<SoftCluster> clusters) {
+  public static void configureWithClusterInfo(Path clusterPathStr, Collection<SoftCluster> clusters) {
     // Get the path location where the cluster Info is stored
     Configuration job = new Configuration();
     Path clusterPath = new Path(clusterPathStr, "*");
-    List<Path> result = new ArrayList<Path>();
+    Collection<Path> result = new ArrayList<Path>();
     // filter out the files
     PathFilter clusterFileFilter = new PathFilter() {
       @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.kme
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -32,7 +32,7 @@ import org.apache.mahout.math.VectorWrit
 
 public class KMeansClusterMapper extends Mapper<WritableComparable<?>,VectorWritable,IntWritable,WeightedVectorWritable> {
   
-  private final List<Cluster> clusters = new ArrayList<Cluster>();
+  private final Collection<Cluster> clusters = new ArrayList<Cluster>();
   private KMeansClusterer clusterer;
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Tue Nov  9 13:19:26 2010
@@ -156,7 +156,7 @@ public class KMeansClusterer {
    *          a List<Cluster> to test.
    */
   protected void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Writer writer)
-      throws IOException, InterruptedException {
+    throws IOException {
     AbstractCluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
     for (AbstractCluster cluster : clusters) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Nov  9 13:19:26 2010
@@ -18,6 +18,7 @@ package org.apache.mahout.clustering.kme
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,8 @@ public class KMeansDriver extends Abstra
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
     addOption(DefaultOptionCreator.clustersInOption()
         .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
-            + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+            + "If k is also specified, then a random set of vectors will be selected"
+            + " and written out to this path first")
         .create());
     addOption(DefaultOptionCreator.numClustersOption()
         .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
@@ -97,7 +99,8 @@ public class KMeansDriver extends Abstra
           .parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
+        DefaultOptionCreator.SEQUENTIAL_METHOD);
     run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
     return 0;
   }
@@ -129,21 +132,27 @@ public class KMeansDriver extends Abstra
                          double convergenceDelta,
                          int maxIterations,
                          boolean runClustering,
-                         boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+                         boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
 
     // iterate until the clusters converge
     String delta = Double.toString(convergenceDelta);
     if (log.isInfoEnabled()) {
       log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output,
           measure.getClass().getName() });
-      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta,
-          maxIterations, VectorWritable.class.getName() });
+      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}",
+          new Object[] { convergenceDelta, maxIterations, VectorWritable.class.getName() });
     }
     Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
     if (runClustering) {
       log.info("Clustering data");
-      clusterData(conf, input, clustersOut, new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential);
+      clusterData(conf,
+          input,
+          clustersOut,
+          new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR),
+          measure,
+          delta,
+          runSequential);
     }
   }
 
@@ -174,9 +183,17 @@ public class KMeansDriver extends Abstra
                          double convergenceDelta,
                          int maxIterations,
                          boolean runClustering,
-                         boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
-    run(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
+                         boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+    run(new Configuration(),
+        input,
+        clustersIn,
+        output,
+        measure,
+        convergenceDelta,
+        maxIterations,
+        runClustering,
+        runSequential);
   }
 
   /**
@@ -206,8 +223,8 @@ public class KMeansDriver extends Abstra
                                    DistanceMeasure measure,
                                    int maxIterations,
                                    String delta,
-                                   boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+                                   boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       return buildClustersSeq(input, clustersIn, output, measure, maxIterations, delta);
     } else {
@@ -220,10 +237,11 @@ public class KMeansDriver extends Abstra
                                        Path output,
                                        DistanceMeasure measure,
                                        int maxIterations,
-                                       String delta) throws InstantiationException, IllegalAccessException, IOException {
+                                       String delta)
+    throws InstantiationException, IllegalAccessException, IOException {
 
     KMeansClusterer clusterer = new KMeansClusterer(measure);
-    List<Cluster> clusters = new ArrayList<Cluster>();
+    Collection<Cluster> clusters = new ArrayList<Cluster>();
 
     KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
     if (clusters.isEmpty()) {
@@ -315,7 +333,8 @@ public class KMeansDriver extends Abstra
                                       Path clustersIn,
                                       Path clustersOut,
                                       String measureClass,
-                                      String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
+                                      String convergenceDelta)
+    throws IOException, InterruptedException, ClassNotFoundException {
 
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
@@ -408,8 +427,8 @@ public class KMeansDriver extends Abstra
                                  Path output,
                                  DistanceMeasure measure,
                                  String convergenceDelta,
-                                 boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+                                 boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
 
     if (log.isInfoEnabled()) {
       log.info("Running Clustering");
@@ -423,11 +442,11 @@ public class KMeansDriver extends Abstra
     }
   }
 
-  private static void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure) throws IOException,
-      InterruptedException, InstantiationException, IllegalAccessException {
+  private static void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure)
+    throws IOException, InstantiationException, IllegalAccessException {
 
     KMeansClusterer clusterer = new KMeansClusterer(measure);
-    List<Cluster> clusters = new ArrayList<Cluster>();
+    Collection<Cluster> clusters = new ArrayList<Cluster>();
     KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
     if (clusters.isEmpty()) {
       throw new IllegalStateException("Clusters is empty!");
@@ -463,7 +482,8 @@ public class KMeansDriver extends Abstra
                                     Path clustersIn,
                                     Path output,
                                     DistanceMeasure measure,
-                                    String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
+                                    String convergenceDelta)
+    throws IOException, InterruptedException, ClassNotFoundException {
 
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.clustering.kme
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -34,7 +33,7 @@ public class KMeansMapper extends Mapper
 
   private KMeansClusterer clusterer;
 
-  private final List<Cluster> clusters = new ArrayList<Cluster>();
+  private final Collection<Cluster> clusters = new ArrayList<Cluster>();
 
   @Override
   protected void map(WritableComparable<?> key, VectorWritable point, Context context)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java Tue Nov  9 13:19:26 2010
@@ -18,8 +18,8 @@ package org.apache.mahout.clustering.kme
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -60,7 +60,7 @@ public class KMeansReducer extends Reduc
 
       String path = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
       if (path.length() > 0) {
-        List<Cluster> clusters = new ArrayList<Cluster>();
+        Collection<Cluster> clusters = new ArrayList<Cluster>();
         KMeansUtil.configureWithClusterInfo(new Path(path), clusters);
         setClusterMap(clusters);
         if (clusterMap.isEmpty()) {
@@ -76,7 +76,7 @@ public class KMeansReducer extends Reduc
     }
   }
 
-  private void setClusterMap(List<Cluster> clusters) {
+  private void setClusterMap(Collection<Cluster> clusters) {
     clusterMap = new HashMap<String, Cluster>();
     for (Cluster cluster : clusters) {
       clusterMap.put(cluster.getIdentifier(), cluster);
@@ -84,7 +84,7 @@ public class KMeansReducer extends Reduc
     clusters.clear();
   }
 
-  public void setup(List<Cluster> clusters, DistanceMeasure measure) {
+  public void setup(Collection<Cluster> clusters, DistanceMeasure measure) {
     setClusterMap(clusters);
     this.clusterer = new KMeansClusterer(measure);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.kme
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -42,12 +42,12 @@ final class KMeansUtil {
   }
 
   /** Configure the mapper with the cluster info */
-  public static void configureWithClusterInfo(Path clusterPathStr, List<Cluster> clusters) {
+  public static void configureWithClusterInfo(Path clusterPathStr, Collection<Cluster> clusters) {
 
     // Get the path location where the cluster Info is stored
     Configuration conf = new Configuration();
     Path clusterPath = new Path(clusterPathStr, "*");
-    List<Path> result = new ArrayList<Path>();
+    Collection<Path> result = new ArrayList<Path>();
 
     // filter out the files
     PathFilter clusterFileFilter = new PathFilter() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java Tue Nov  9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -75,7 +76,7 @@ public class MeanShiftCanopyClusterMappe
       for (FileStatus s : status) {
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
         try {
-          Text key = new Text();
+          Writable key = new Text();
           MeanShiftCanopy canopy = new MeanShiftCanopy();
           while (reader.next(key, canopy)) {
             canopies.add(canopy);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.mea
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
@@ -27,7 +27,7 @@ import org.apache.hadoop.mapreduce.Mappe
 
 public class MeanShiftCanopyMapper extends Mapper<WritableComparable<?>,MeanShiftCanopy,Text,MeanShiftCanopy> {
   
-  private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
+  private final Collection<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
   
   private MeanShiftCanopyClusterer clusterer;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Tue Nov  9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.mea
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -29,7 +29,7 @@ import org.apache.hadoop.mapreduce.Reduc
 
 public class MeanShiftCanopyReducer extends Reducer<Text,MeanShiftCanopy,Text,MeanShiftCanopy> {
   
-  private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
+  private final Collection<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
   private MeanShiftCanopyClusterer clusterer;
   private boolean allConverged = true;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java Tue Nov  9 13:19:26 2010
@@ -65,7 +65,7 @@ public class HashFactory {
     @Override
     public int hash(byte[] bytes) {
       long hashValue = 31;
-      for (byte byteVal : bytes) {
+      for (long byteVal : bytes) {
         hashValue *= seedA * byteVal;
         hashValue += seedB;
       }
@@ -87,7 +87,7 @@ public class HashFactory {
     @Override
     public int hash(byte[] bytes) {
       long hashValue = 31;
-      for (byte byteVal : bytes) {
+      for (long byteVal : bytes) {
         hashValue *= seedA * (byteVal >> 4);
         hashValue += seedB * byteVal + seedC;
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java Tue Nov  9 13:19:26 2010
@@ -155,8 +155,13 @@ public final class IntTuple implements W
     int otherLength = otherTuple.length();
     int min = Math.min(thisLength, otherLength);
     for (int i = 0; i < min; i++) {
-      if (this.tuple.get(i) == otherTuple.at(i)) return 0;
-      return this.tuple.get(i) - otherTuple.at(i);
+      int a = this.tuple.get(i);
+      int b = otherTuple.at(i);
+      if (a < b) {
+        return -1;
+      } else if (a > b) {
+        return 1;
+      }
     }
     if (thisLength < otherLength) {
       return -1;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java Tue Nov  9 13:19:26 2010
@@ -185,8 +185,7 @@ public class DefaultTreeBuilder implemen
     boolean[] cloned = new boolean[selected.length];
 
     for (int i = 0; i < selected.length; i++) {
-      if (dataset.isNumerical(i)) cloned[i] = false;
-      else cloned[i] = selected[i];
+      cloned[i] = !dataset.isNumerical(i) && selected[i];
     }
 
     return cloned;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Tue Nov  9 13:19:26 2010
@@ -19,10 +19,10 @@ package org.apache.mahout.df.data;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
-import java.util.Set;
 
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -225,7 +225,7 @@ public class Data implements Cloneable {
    * @param attr
    */
   public double[] values(int attr) {
-    Set<Double> result = new HashSet<Double>();
+    Collection<Double> result = new HashSet<Double>();
     
     for (Instance instance : instances) {
       result.add(instance.get(attr));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java Tue Nov  9 13:19:26 2010
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.df.mapreduce;
 
+import org.apache.hadoop.io.Writable;
 import org.apache.mahout.common.HadoopUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -152,7 +153,7 @@ public class Classifier {
    * Will compute the ConfusionMatrix if necessary.
    * @param job
    */
-  private void parseOutput(Job job) throws IOException {
+  private void parseOutput(JobContext job) throws IOException {
     Configuration conf = job.getConfiguration();
     FileSystem fs = mappersOutputPath.getFileSystem(conf);
 
@@ -161,7 +162,7 @@ public class Classifier {
 
     // read all the output
     LongWritable key = new LongWritable();
-    Text value = new Text();
+    Writable value = new Text();
     for (Path path : outfiles) {
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java Tue Nov  9 13:19:26 2010
@@ -99,8 +99,8 @@ public final class Describe {
     }
   }
   
-  private static void runTool(String dataPath, List<String> description, String filePath) throws DescriptorException,
-                                                                                         IOException {
+  private static void runTool(String dataPath, Iterable<String> description, String filePath)
+    throws DescriptorException, IOException {
     log.info("Generating the descriptor...");
     String descriptor = DescriptorUtils.generateDescriptor(description);
     
@@ -113,8 +113,7 @@ public final class Describe {
     DFUtils.storeWritable(new Configuration(), fPath, dataset);
   }
   
-  private static Dataset generateDataset(String descriptor, String dataPath) throws IOException,
-                                                                            DescriptorException {
+  private static Dataset generateDataset(String descriptor, String dataPath) throws IOException, DescriptorException {
     Path path = new Path(dataPath);
     FileSystem fs = path.getFileSystem(new Configuration());
     

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java Tue Nov  9 13:19:26 2010
@@ -141,7 +141,7 @@ public class FrequenciesJob {
     Frequencies[] values = new Frequencies[numMaps];
     
     // read all the outputs
-    LongWritable key = new LongWritable();
+    Writable key = new LongWritable();
     Frequencies value = new Frequencies();
     
     int index = 0;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Tue Nov  9 13:19:26 2010
@@ -100,11 +100,9 @@ public final class MahoutDriver {
 
     try {
       mainClasses.load(propsStream);
-    } catch (Throwable e) {
+    } catch (IOException e) {
       //try getting the default one
-      propsStream = Thread.currentThread()
-                                    .getContextClassLoader()
-                                    .getResourceAsStream("driver.classes.default.props");
+      propsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("driver.classes.default.props");
       mainClasses.load(propsStream);
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java Tue Nov  9 13:19:26 2010
@@ -20,9 +20,9 @@ package org.apache.mahout.fpm.pfpgrowth;
 import java.io.File;
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -189,7 +189,7 @@ public final class FPGrowthDriver {
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
     
     FPGrowth<String> fp = new FPGrowth<String>();
-    Set<String> features = new HashSet<String>();
+    Collection<String> features = new HashSet<String>();
     
     fp.generateTopKFrequentPatterns(
         new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
@@ -205,8 +205,7 @@ public final class FPGrowthDriver {
     
     List<Pair<String,TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(fs, conf, path);
     for (Pair<String,TopKStringPatterns> entry : frequentPatterns) {
-      log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond()
-          .toString());
+      log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
     }
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Tue Nov  9 13:19:26 2010
@@ -18,11 +18,11 @@
 package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -49,7 +49,7 @@ public class ParallelFPGrowthMapper exte
       Pair<List<Integer>,Long> pattern = it.next();
       Integer[] prunedItems = pattern.getFirst().toArray(new Integer[pattern.getFirst().size()]);
       
-      Set<Long> groups = new HashSet<Long>();
+      Collection<Long> groups = new HashSet<Long>();
       for (int j = prunedItems.length - 1; j >= 0; j--) { // generate group
         // dependent
         // shards

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java Tue Nov  9 13:19:26 2010
@@ -23,7 +23,6 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.io.LongWritable;
@@ -49,7 +48,7 @@ public class TransactionSortingMapper ex
                                                                       InterruptedException {
     
     String[] items = splitter.split(input.toString());
-    Set<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
+    Iterable<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
     
     List<Integer> itemSet = new ArrayList<Integer>();
     for (String item : uniqueItems) { // remove items not in the fList

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Tue Nov  9 13:19:26 2010
@@ -182,7 +182,7 @@ public final class TransactionTree imple
     return false;
   }
   
-  public int addPattern(List<Integer> myList, long addCount) {
+  public int addPattern(Iterable<Integer> myList, long addCount) {
     int temp = ROOTNODEID;
     int ret = 0;
     boolean addCountMode = true;