You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/11/09 14:19:28 UTC
svn commit: r1032979 [1/2] - in
/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/impl/common/
cf/taste/impl/model/ classifier/ classifier/bayes/
classifier/bayes/algorithm/ classifier/bayes/interfaces/
classifier/naivebayes/ classifier/naiv...
Author: srowen
Date: Tue Nov 9 13:19:26 2010
New Revision: 1032979
URL: http://svn.apache.org/viewvc?rev=1032979&view=rev
Log:
Style changes, mostly weakening types
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java Tue Nov 9 13:19:26 2010
@@ -122,8 +122,11 @@ public final class Cache<K,V> implements
private V getAndCacheValue(K key) throws TasteException {
V value = retriever.get(key);
+ if (value == null) {
+ value = (V) NULL;
+ }
synchronized (cache) {
- cache.put(key, value == null ? (V) NULL : value);
+ cache.put(key, value);
}
return value;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Tue Nov 9 13:19:26 2010
@@ -47,7 +47,7 @@ public final class FastByIDMap<V> implem
private V[] values;
private int numEntries;
private int numSlotsUsed;
- private int maxSize;
+ private final int maxSize;
private BitSet recentlyAccessed;
private final boolean countingAccesses;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Tue Nov 9 13:19:26 2010
@@ -63,7 +63,7 @@ public final class FastMap<K,V> implemen
private V[] values;
private int numEntries;
private int numSlotsUsed;
- private int maxSize;
+ private final int maxSize;
private BitSet recentlyAccessed;
private final boolean countingAccesses;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java Tue Nov 9 13:19:26 2010
@@ -92,7 +92,7 @@ public final class GenericDataModel exte
for (Preference preference : prefs) {
long itemID = preference.getItemID();
itemIDSet.add(itemID);
- List<Preference> prefsForItem = (List<Preference>) prefsForItems.get(itemID);
+ Collection<Preference> prefsForItem = prefsForItems.get(itemID);
if (prefsForItem == null) {
prefsForItem = new ArrayList<Preference>(2);
prefsForItems.put(itemID, prefsForItem);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Tue Nov 9 13:19:26 2010
@@ -18,6 +18,7 @@
package org.apache.mahout.classifier;
import java.text.DecimalFormat;
+import java.text.NumberFormat;
import java.util.Collection;
import org.apache.commons.lang.StringUtils;
@@ -84,7 +85,7 @@ public class ResultAnalyzer implements S
int totalClassified = correctlyClassified + incorrectlyClassified;
double percentageCorrect = (double) 100 * correctlyClassified / totalClassified;
double percentageIncorrect = (double) 100 * incorrectlyClassified / totalClassified;
- DecimalFormat decimalFormatter = new DecimalFormat("0.####");
+ NumberFormat decimalFormatter = new DecimalFormat("0.####");
returnString.append(StringUtils.rightPad("Correctly Classified Instances", 40)).append(": ").append(
StringUtils.leftPad(Integer.toString(correctlyClassified), 10)).append('\t').append(
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Tue Nov 9 13:19:26 2010
@@ -130,12 +130,6 @@ public final class TestClassifier {
BayesParameters params = new BayesParameters();
// Setting all default values
int gramSize = 1;
- String classifierType = "bayes";
- String dataSource = "hdfs";
- String defaultCat = "unknown";
- String encoding = "UTF-8";
- String alphaI = "1.0";
- String classificationMethod = "sequential";
String modelBasePath = (String) cmdLine.getValue(pathOpt);
@@ -143,23 +137,28 @@ public final class TestClassifier {
gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
}
-
+
+ String classifierType = "bayes";
if (cmdLine.hasOption(classifierType)) {
classifierType = (String) cmdLine.getValue(typeOpt);
}
-
+
+ String dataSource = "hdfs";
if (cmdLine.hasOption(dataSource)) {
dataSource = (String) cmdLine.getValue(dataSource);
}
-
+
+ String defaultCat = "unknown";
if (cmdLine.hasOption(defaultCatOpt)) {
defaultCat = (String) cmdLine.getValue(defaultCatOpt);
}
-
+
+ String encoding = "UTF-8";
if (cmdLine.hasOption(encodingOpt)) {
encoding = (String) cmdLine.getValue(encodingOpt);
}
-
+
+ String alphaI = "1.0";
if (cmdLine.hasOption(alphaOpt)) {
alphaI = (String) cmdLine.getValue(alphaOpt);
}
@@ -167,7 +166,8 @@ public final class TestClassifier {
boolean verbose = cmdLine.hasOption(verboseOutputOpt);
String testDirPath = (String) cmdLine.getValue(dirOpt);
-
+
+ String classificationMethod = "sequential";
if (cmdLine.hasOption(methodOpt)) {
classificationMethod = (String) cmdLine.getValue(methodOpt);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Tue Nov 9 13:19:26 2010
@@ -108,7 +108,7 @@ public class BayesAlgorithm implements A
@Override
public double documentWeight(final Datastore datastore,
final String label,
- String[] document) throws InvalidDatastoreException {
+ String[] document) {
OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(document.length / 2);
for (String word : document) {
if (wordList.containsKey(word)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Tue Nov 9 13:19:26 2010
@@ -117,7 +117,7 @@ public class CBayesAlgorithm implements
@Override
public double documentWeight(final Datastore datastore,
final String label,
- String[] document) throws InvalidDatastoreException {
+ String[] document) {
OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(document.length / 2);
for (String word : document) {
if (wordList.containsKey(word)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java Tue Nov 9 13:19:26 2010
@@ -96,10 +96,9 @@ public interface Algorithm {
* @param document
* The document
* @return The probability
- * @throws InvalidDatastoreException
* @see Algorithm#featureWeight(Datastore, String, String)
*/
- double documentWeight(Datastore datastore, String label, String[] document) throws InvalidDatastoreException;
+ double documentWeight(Datastore datastore, String label, String[] document);
/**
* Returns the labels in the given Model
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Tue Nov 9 13:19:26 2010
@@ -28,11 +28,15 @@ import org.apache.mahout.math.Vector.Ele
*
*/
public abstract class AbstractNaiveBayesClassifier extends AbstractVectorClassifier {
- protected NaiveBayesModel model;
+ private final NaiveBayesModel model;
- public AbstractNaiveBayesClassifier(NaiveBayesModel model) {
+ protected AbstractNaiveBayesClassifier(NaiveBayesModel model) {
this.model = model;
}
+
+ protected NaiveBayesModel getModel() {
+ return model;
+ }
public abstract double getScoreForLabelFeature(int label, int feature);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Tue Nov 9 13:19:26 2010
@@ -30,6 +30,7 @@ public class ComplementaryNaiveBayesClas
@Override
public double getScoreForLabelFeature(int label, int feature) {
+ NaiveBayesModel model = getModel();
double result = model.getWeightMatrix().get(label, feature);
double vocabCount = model.getVocabCount();
double featureSum = model.getFeatureSum().get(feature);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Tue Nov 9 13:19:26 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesTrainer;
import org.apache.mahout.math.JsonMatrixAdapter;
import org.apache.mahout.math.JsonVectorAdapter;
@@ -155,7 +156,7 @@ public class NaiveBayesModel implements
FileSystem fs = sumVectorPath.getFileSystem(conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, sumVectorPath, conf);
- Text key = new Text();
+ Writable key = new Text();
VectorWritable value = new VectorWritable();
int featureCount = 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java Tue Nov 9 13:19:26 2010
@@ -30,6 +30,7 @@ public class StandardNaiveBayesClassifie
@Override
public double getScoreForLabelFeature(int label, int feature) {
+ NaiveBayesModel model = getModel();
double result = model.getWeightMatrix().get(label, feature);
double vocabCount = model.getVocabCount();
double sumLabelWeight = model.getLabelSum().get(label);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java Tue Nov 9 13:19:26 2010
@@ -34,7 +34,7 @@ import org.apache.mahout.math.map.OpenOb
public class NaiveBayesInstanceMapper extends Mapper<Text, VectorWritable, IntWritable, VectorWritable> {
- private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+ private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
@Override
protected void map(Text key, VectorWritable value, Context context)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaComplementaryMapper.java Tue Nov 9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.classifier.naivebayes.BayesConstants;
import org.apache.mahout.math.Vector;
@@ -37,7 +38,7 @@ import org.apache.mahout.math.map.OpenOb
public class NaiveBayesThetaComplementaryMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
- private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+ private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
private Vector featureSum;
private Vector labelSum;
private Vector perLabelThetaNormalizer;
@@ -74,7 +75,7 @@ public class NaiveBayesThetaComplementar
Path weightFile = new Path(localFiles[0].getPath());
FileSystem fs = weightFile.getFileSystem(conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, weightFile, conf);
- Text key = new Text();
+ Writable key = new Text();
VectorWritable value = new VectorWritable();
while (reader.next(key, value)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java Tue Nov 9 13:19:26 2010
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.classifier.naivebayes.BayesConstants;
import org.apache.mahout.math.Vector;
@@ -35,7 +36,7 @@ import org.apache.mahout.math.map.OpenOb
public class NaiveBayesThetaMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
- private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+ private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
private Vector featureSum;
private Vector labelSum;
private Vector perLabelThetaNormalizer;
@@ -64,7 +65,7 @@ public class NaiveBayesThetaMapper exten
Path weightFile = new Path(localFiles[0].getPath());
FileSystem fs = weightFile.getFileSystem(conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, weightFile, conf);
- Text key = new Text();
+ Writable key = new Text();
VectorWritable value = new VectorWritable();
while (reader.next(key, value)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.classifier.nai
import java.io.IOException;
import java.net.URI;
-import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -50,14 +49,17 @@ public final class NaiveBayesTrainer {
public static final String LABEL_MAP = "labelMap";
public static final String ALPHA_I = "alphaI";
+ private NaiveBayesTrainer() {
+ }
+
public static void trainNaiveBayes(Path input,
Configuration conf,
- List<String> inputLabels,
+ Iterable<String> inputLabels,
Path output,
int numReducers,
float alphaI,
boolean trainComplementary)
- throws IOException, InterruptedException, ClassNotFoundException {
+ throws IOException, InterruptedException, ClassNotFoundException {
conf.setFloat(ALPHA_I, alphaI);
Path labelMapPath = createLabelMapFile(inputLabels, conf, new Path(output, LABEL_MAP));
Path classVectorPath = new Path(output, CLASS_VECTORS);
@@ -72,7 +74,9 @@ public final class NaiveBayesTrainer {
}
}
- private static void runNaiveBayesByLabelSummer(Path input, Configuration conf, Path labelMapPath,
+ private static void runNaiveBayesByLabelSummer(Path input,
+ Configuration conf,
+ Path labelMapPath,
Path output, int numReducers)
throws IOException, InterruptedException, ClassNotFoundException {
@@ -99,8 +103,11 @@ public final class NaiveBayesTrainer {
job.waitForCompletion(true);
}
- private static void runNaiveBayesWeightSummer(Path input, Configuration conf,
- Path labelMapPath, Path output, int numReducers)
+ private static void runNaiveBayesWeightSummer(Path input,
+ Configuration conf,
+ Path labelMapPath,
+ Path output,
+ int numReducers)
throws IOException, InterruptedException, ClassNotFoundException {
// this conf parameter needs to be set enable serialisation of conf values
@@ -124,9 +131,12 @@ public final class NaiveBayesTrainer {
job.waitForCompletion(true);
}
- private static void runNaiveBayesThetaSummer(Path input, Configuration conf,
- Path weightFilePath, Path output, int numReducers)
- throws IOException, InterruptedException, ClassNotFoundException {
+ private static void runNaiveBayesThetaSummer(Path input,
+ Configuration conf,
+ Path weightFilePath,
+ Path output,
+ int numReducers)
+ throws IOException, InterruptedException, ClassNotFoundException {
// this conf parameter needs to be set enable serialisation of conf values
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
@@ -150,9 +160,12 @@ public final class NaiveBayesTrainer {
job.waitForCompletion(true);
}
- private static void runNaiveBayesThetaComplementarySummer(Path input, Configuration conf,
- Path weightFilePath, Path output, int numReducers)
- throws IOException, InterruptedException, ClassNotFoundException {
+ private static void runNaiveBayesThetaComplementarySummer(Path input,
+ Configuration conf,
+ Path weightFilePath,
+ Path output,
+ int numReducers)
+ throws IOException, InterruptedException, ClassNotFoundException {
// this conf parameter needs to be set enable serialisation of conf values
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
@@ -180,14 +193,10 @@ public final class NaiveBayesTrainer {
/**
* Write the list of labels into a map file
- *
- * @param wordCountPath
- * @param dictionaryPathBase
- * @throws IOException
*/
- public static Path createLabelMapFile(List<String> labels,
- Configuration conf,
- Path labelMapPathBase) throws IOException {
+ public static Path createLabelMapFile(Iterable<String> labels,
+ Configuration conf,
+ Path labelMapPathBase) throws IOException {
FileSystem fs = FileSystem.get(labelMapPathBase.toUri(), conf);
Path labelMapPath = new Path(labelMapPathBase, LABEL_MAP);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesWeightsMapper.java Tue Nov 9 13:19:26 2010
@@ -37,9 +37,9 @@ import org.apache.mahout.math.map.OpenOb
public class NaiveBayesWeightsMapper extends Mapper<IntWritable, VectorWritable, Text, VectorWritable> {
- private OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
- Vector featureSum;
- Vector labelSum;
+ private final OpenObjectIntHashMap<String> labelMap = new OpenObjectIntHashMap<String>();
+ private Vector featureSum;
+ private Vector labelSum;
@Override
protected void map(IntWritable key, VectorWritable value, Context context)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java Tue Nov 9 13:19:26 2010
@@ -22,10 +22,7 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.function.BinaryFunction;
-import org.apache.mahout.math.function.Functions;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -119,10 +116,10 @@ public class ModelDissector {
}
private static class Category implements Comparable<Category> {
- int index;
- double weight;
+ private final int index;
+ private final double weight;
- public Category(int index, double weight) {
+ private Category(int index, double weight) {
this.index = index;
this.weight = weight;
}
@@ -130,10 +127,15 @@ public class ModelDissector {
@Override
public int compareTo(Category o) {
int r = Double.compare(Math.abs(weight), Math.abs(o.weight));
- if (r != 0) {
- return r;
+ if (r == 0) {
+ if (index < o.index) {
+ return -1;
+ } else if (index > o.index) {
+ return 1;
+ }
+ return 0;
} else {
- return index - o.index;
+ return r;
}
}
}
@@ -142,7 +144,7 @@ public class ModelDissector {
private final String feature;
private final double value;
private final int maxIndex;
- private List<Category> categories;
+ private final List<Category> categories;
public Weight(String feature, Vector weights) {
this(feature, weights, 3);
@@ -151,7 +153,7 @@ public class ModelDissector {
public Weight(String feature, Vector weights, int n) {
this.feature = feature;
// pick out the weight with the largest abs value, but don't forget the sign
- PriorityQueue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural().reverse());
+ Queue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural().reverse());
for (Vector.Element element : weights) {
biggest.add(new Category(element.index(), element.get()));
while (biggest.size() > n) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java Tue Nov 9 13:19:26 2010
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.math.Vector;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java Tue Nov 9 13:19:26 2010
@@ -27,7 +27,7 @@ import org.apache.mahout.math.VectorWrit
public class DistanceMeasureCluster extends AbstractCluster {
- protected DistanceMeasure measure;
+ private DistanceMeasure measure;
public DistanceMeasureCluster(Vector point, int id, DistanceMeasure measure) {
super(point, id);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/VectorModelClassifier.java Tue Nov 9 13:19:26 2010
@@ -16,6 +16,7 @@
package org.apache.mahout.clustering;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.mahout.classifier.AbstractVectorClassifier;
@@ -43,7 +44,7 @@ public class VectorModelClassifier exten
public Vector classify(Vector instance) {
Vector pdfs = new DenseVector(models.size());
if (models.get(0) instanceof SoftCluster) {
- List<SoftCluster> clusters = new ArrayList<SoftCluster>();
+ Collection<SoftCluster> clusters = new ArrayList<SoftCluster>();
List<Double> distances = new ArrayList<Double>();
for (Model<VectorWritable> model : models) {
SoftCluster sc = (SoftCluster) model;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java Tue Nov 9 13:19:26 2010
@@ -20,7 +20,6 @@ package org.apache.mahout.clustering.can
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -31,6 +30,7 @@ import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -46,7 +46,7 @@ public class ClusterMapper extends Mappe
canopyClusterer.emitPointToClosestCanopy(point.get(), canopies, context);
}
- private final List<Canopy> canopies = new ArrayList<Canopy>();
+ private final Collection<Canopy> canopies = new ArrayList<Canopy>();
/**
* Configure the mapper by providing its canopies. Used by unit tests.
@@ -83,7 +83,7 @@ public class ClusterMapper extends Mappe
for (FileStatus file : files) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
try {
- Text key = new Text();
+ Writable key = new Text();
Canopy value = new Canopy();
while (reader.next(key, value)) {
canopies.add(value);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterMapper.java Tue Nov 9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -74,7 +75,7 @@ public class DirichletClusterMapper exte
for (FileStatus s : status) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
try {
- Text key = new Text();
+ Writable key = new Text();
DirichletCluster cluster = new DirichletCluster();
while (reader.next(key, cluster)) {
clusters.add(cluster);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Tue Nov 9 13:19:26 2010
@@ -257,8 +257,7 @@ public class DirichletClusterer {
}
// then pick one cluster by sampling a Multinomial distribution based upon them
// see: http://en.wikipedia.org/wiki/Multinomial_distribution
- int k = UncommonDistributions.rMultinom(pi);
- return k;
+ return UncommonDistributions.rMultinom(pi);
}
protected void updateModels(Cluster[] newModels) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java Tue Nov 9 13:19:26 2010
@@ -30,7 +30,7 @@ import org.apache.mahout.math.VectorWrit
*/
public class DistanceMeasureClusterDistribution extends AbstractVectorModelDistribution {
- DistanceMeasure measure;
+ private DistanceMeasure measure;
public DistanceMeasureClusterDistribution() {
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Tue Nov 9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.fuz
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,7 @@ public class FuzzyKMeansClusterMapper ex
* @param clusters
* a List<Cluster>
*/
- void setup(List<SoftCluster> clusters, Configuration conf) {
+ void setup(Collection<SoftCluster> clusters, Configuration conf) {
this.clusters.clear();
this.clusters.addAll(clusters);
this.clusterer = new FuzzyKMeansClusterer(conf);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java Tue Nov 9 13:19:26 2010
@@ -234,7 +234,7 @@ public class FuzzyKMeansClusterer {
}
}
- public Vector computePi(List<SoftCluster> clusters, List<Double> clusterDistanceList) {
+ public Vector computePi(Collection<SoftCluster> clusters, List<Double> clusterDistanceList) {
Vector pi = new DenseVector(clusters.size());
for (int i = 0; i < clusters.size(); i++) {
double probWeight = computeProbWeight(clusterDistanceList.get(i), clusterDistanceList);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Tue Nov 9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.fuz
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -67,7 +68,7 @@ public class FuzzyKMeansMapper extends M
* @param clusters
* a List<Cluster>
*/
- void config(List<SoftCluster> clusters) {
+ void config(Collection<SoftCluster> clusters) {
this.clusters.clear();
this.clusters.addAll(clusters);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java Tue Nov 9 13:19:26 2010
@@ -19,8 +19,8 @@ package org.apache.mahout.clustering.fuz
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
@@ -59,7 +59,7 @@ public class FuzzyKMeansReducer extends
Configuration conf = context.getConfiguration();
clusterer = new FuzzyKMeansClusterer(conf);
- List<SoftCluster> clusters = new ArrayList<SoftCluster>();
+ Collection<SoftCluster> clusters = new ArrayList<SoftCluster>();
String clusterPath = conf.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
if ((clusterPath != null) && (clusterPath.length() > 0)) {
FuzzyKMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
@@ -71,7 +71,7 @@ public class FuzzyKMeansReducer extends
}
}
- private void setClusterMap(List<SoftCluster> clusters) {
+ private void setClusterMap(Collection<SoftCluster> clusters) {
clusterMap.clear();
for (SoftCluster cluster : clusters) {
clusterMap.put(cluster.getIdentifier(), cluster);
@@ -79,7 +79,7 @@ public class FuzzyKMeansReducer extends
clusters.clear();
}
- public void setup(List<SoftCluster> clusters, Configuration conf) {
+ public void setup(Collection<SoftCluster> clusters, Configuration conf) {
setClusterMap(clusters);
clusterer = new FuzzyKMeansClusterer(conf);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.fuz
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -41,11 +41,11 @@ final class FuzzyKMeansUtil {
}
/** Configure the mapper with the cluster info */
- public static void configureWithClusterInfo(Path clusterPathStr, List<SoftCluster> clusters) {
+ public static void configureWithClusterInfo(Path clusterPathStr, Collection<SoftCluster> clusters) {
// Get the path location where the cluster Info is stored
Configuration job = new Configuration();
Path clusterPath = new Path(clusterPathStr, "*");
- List<Path> result = new ArrayList<Path>();
+ Collection<Path> result = new ArrayList<Path>();
// filter out the files
PathFilter clusterFileFilter = new PathFilter() {
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.kme
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -32,7 +32,7 @@ import org.apache.mahout.math.VectorWrit
public class KMeansClusterMapper extends Mapper<WritableComparable<?>,VectorWritable,IntWritable,WeightedVectorWritable> {
- private final List<Cluster> clusters = new ArrayList<Cluster>();
+ private final Collection<Cluster> clusters = new ArrayList<Cluster>();
private KMeansClusterer clusterer;
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Tue Nov 9 13:19:26 2010
@@ -156,7 +156,7 @@ public class KMeansClusterer {
* a List<Cluster> to test.
*/
protected void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Writer writer)
- throws IOException, InterruptedException {
+ throws IOException {
AbstractCluster nearestCluster = null;
double nearestDistance = Double.MAX_VALUE;
for (AbstractCluster cluster : clusters) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Nov 9 13:19:26 2010
@@ -18,6 +18,7 @@ package org.apache.mahout.clustering.kme
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,8 @@ public class KMeansDriver extends Abstra
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
- + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+ + "If k is also specified, then a random set of vectors will be selected"
+ + " and written out to this path first")
.create());
addOption(DefaultOptionCreator.numClustersOption()
.withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
@@ -97,7 +99,8 @@ public class KMeansDriver extends Abstra
.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
- boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+ boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
+ DefaultOptionCreator.SEQUENTIAL_METHOD);
run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
return 0;
}
@@ -129,21 +132,27 @@ public class KMeansDriver extends Abstra
double convergenceDelta,
int maxIterations,
boolean runClustering,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
// iterate until the clusters converge
String delta = Double.toString(convergenceDelta);
if (log.isInfoEnabled()) {
log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output,
measure.getClass().getName() });
- log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta,
- maxIterations, VectorWritable.class.getName() });
+ log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}",
+ new Object[] { convergenceDelta, maxIterations, VectorWritable.class.getName() });
}
Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
if (runClustering) {
log.info("Clustering data");
- clusterData(conf, input, clustersOut, new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential);
+ clusterData(conf,
+ input,
+ clustersOut,
+ new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR),
+ measure,
+ delta,
+ runSequential);
}
}
@@ -174,9 +183,17 @@ public class KMeansDriver extends Abstra
double convergenceDelta,
int maxIterations,
boolean runClustering,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
- run(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ run(new Configuration(),
+ input,
+ clustersIn,
+ output,
+ measure,
+ convergenceDelta,
+ maxIterations,
+ runClustering,
+ runSequential);
}
/**
@@ -206,8 +223,8 @@ public class KMeansDriver extends Abstra
DistanceMeasure measure,
int maxIterations,
String delta,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
if (runSequential) {
return buildClustersSeq(input, clustersIn, output, measure, maxIterations, delta);
} else {
@@ -220,10 +237,11 @@ public class KMeansDriver extends Abstra
Path output,
DistanceMeasure measure,
int maxIterations,
- String delta) throws InstantiationException, IllegalAccessException, IOException {
+ String delta)
+ throws InstantiationException, IllegalAccessException, IOException {
KMeansClusterer clusterer = new KMeansClusterer(measure);
- List<Cluster> clusters = new ArrayList<Cluster>();
+ Collection<Cluster> clusters = new ArrayList<Cluster>();
KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
if (clusters.isEmpty()) {
@@ -315,7 +333,8 @@ public class KMeansDriver extends Abstra
Path clustersIn,
Path clustersOut,
String measureClass,
- String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
+ String convergenceDelta)
+ throws IOException, InterruptedException, ClassNotFoundException {
conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
@@ -408,8 +427,8 @@ public class KMeansDriver extends Abstra
Path output,
DistanceMeasure measure,
String convergenceDelta,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
if (log.isInfoEnabled()) {
log.info("Running Clustering");
@@ -423,11 +442,11 @@ public class KMeansDriver extends Abstra
}
}
- private static void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure) throws IOException,
- InterruptedException, InstantiationException, IllegalAccessException {
+ private static void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure)
+ throws IOException, InstantiationException, IllegalAccessException {
KMeansClusterer clusterer = new KMeansClusterer(measure);
- List<Cluster> clusters = new ArrayList<Cluster>();
+ Collection<Cluster> clusters = new ArrayList<Cluster>();
KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("Clusters is empty!");
@@ -463,7 +482,8 @@ public class KMeansDriver extends Abstra
Path clustersIn,
Path output,
DistanceMeasure measure,
- String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
+ String convergenceDelta)
+ throws IOException, InterruptedException, ClassNotFoundException {
conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.clustering.kme
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -34,7 +33,7 @@ public class KMeansMapper extends Mapper
private KMeansClusterer clusterer;
- private final List<Cluster> clusters = new ArrayList<Cluster>();
+ private final Collection<Cluster> clusters = new ArrayList<Cluster>();
@Override
protected void map(WritableComparable<?> key, VectorWritable point, Context context)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java Tue Nov 9 13:19:26 2010
@@ -18,8 +18,8 @@ package org.apache.mahout.clustering.kme
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
@@ -60,7 +60,7 @@ public class KMeansReducer extends Reduc
String path = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
if (path.length() > 0) {
- List<Cluster> clusters = new ArrayList<Cluster>();
+ Collection<Cluster> clusters = new ArrayList<Cluster>();
KMeansUtil.configureWithClusterInfo(new Path(path), clusters);
setClusterMap(clusters);
if (clusterMap.isEmpty()) {
@@ -76,7 +76,7 @@ public class KMeansReducer extends Reduc
}
}
- private void setClusterMap(List<Cluster> clusters) {
+ private void setClusterMap(Collection<Cluster> clusters) {
clusterMap = new HashMap<String, Cluster>();
for (Cluster cluster : clusters) {
clusterMap.put(cluster.getIdentifier(), cluster);
@@ -84,7 +84,7 @@ public class KMeansReducer extends Reduc
clusters.clear();
}
- public void setup(List<Cluster> clusters, DistanceMeasure measure) {
+ public void setup(Collection<Cluster> clusters, DistanceMeasure measure) {
setClusterMap(clusters);
this.clusterer = new KMeansClusterer(measure);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.kme
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -42,12 +42,12 @@ final class KMeansUtil {
}
/** Configure the mapper with the cluster info */
- public static void configureWithClusterInfo(Path clusterPathStr, List<Cluster> clusters) {
+ public static void configureWithClusterInfo(Path clusterPathStr, Collection<Cluster> clusters) {
// Get the path location where the cluster Info is stored
Configuration conf = new Configuration();
Path clusterPath = new Path(clusterPathStr, "*");
- List<Path> result = new ArrayList<Path>();
+ Collection<Path> result = new ArrayList<Path>();
// filter out the files
PathFilter clusterFileFilter = new PathFilter() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java Tue Nov 9 13:19:26 2010
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -75,7 +76,7 @@ public class MeanShiftCanopyClusterMappe
for (FileStatus s : status) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
try {
- Text key = new Text();
+ Writable key = new Text();
MeanShiftCanopy canopy = new MeanShiftCanopy();
while (reader.next(key, canopy)) {
canopies.add(canopy);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.mea
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
@@ -27,7 +27,7 @@ import org.apache.hadoop.mapreduce.Mappe
public class MeanShiftCanopyMapper extends Mapper<WritableComparable<?>,MeanShiftCanopy,Text,MeanShiftCanopy> {
- private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
+ private final Collection<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
private MeanShiftCanopyClusterer clusterer;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.mea
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -29,7 +29,7 @@ import org.apache.hadoop.mapreduce.Reduc
public class MeanShiftCanopyReducer extends Reducer<Text,MeanShiftCanopy,Text,MeanShiftCanopy> {
- private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
+ private final Collection<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
private MeanShiftCanopyClusterer clusterer;
private boolean allConverged = true;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java Tue Nov 9 13:19:26 2010
@@ -65,7 +65,7 @@ public class HashFactory {
@Override
public int hash(byte[] bytes) {
long hashValue = 31;
- for (byte byteVal : bytes) {
+ for (long byteVal : bytes) {
hashValue *= seedA * byteVal;
hashValue += seedB;
}
@@ -87,7 +87,7 @@ public class HashFactory {
@Override
public int hash(byte[] bytes) {
long hashValue = 31;
- for (byte byteVal : bytes) {
+ for (long byteVal : bytes) {
hashValue *= seedA * (byteVal >> 4);
hashValue += seedB * byteVal + seedC;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java Tue Nov 9 13:19:26 2010
@@ -155,8 +155,13 @@ public final class IntTuple implements W
int otherLength = otherTuple.length();
int min = Math.min(thisLength, otherLength);
for (int i = 0; i < min; i++) {
- if (this.tuple.get(i) == otherTuple.at(i)) return 0;
- return this.tuple.get(i) - otherTuple.at(i);
+ int a = this.tuple.get(i);
+ int b = otherTuple.at(i);
+ if (a < b) {
+ return -1;
+ } else if (a > b) {
+ return 1;
+ }
}
if (thisLength < otherLength) {
return -1;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java Tue Nov 9 13:19:26 2010
@@ -185,8 +185,7 @@ public class DefaultTreeBuilder implemen
boolean[] cloned = new boolean[selected.length];
for (int i = 0; i < selected.length; i++) {
- if (dataset.isNumerical(i)) cloned[i] = false;
- else cloned[i] = selected[i];
+ cloned[i] = !dataset.isNumerical(i) && selected[i];
}
return cloned;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Tue Nov 9 13:19:26 2010
@@ -19,10 +19,10 @@ package org.apache.mahout.df.data;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
-import java.util.Set;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -225,7 +225,7 @@ public class Data implements Cloneable {
* @param attr
*/
public double[] values(int attr) {
- Set<Double> result = new HashSet<Double>();
+ Collection<Double> result = new HashSet<Double>();
for (Instance instance : instances) {
result.add(instance.get(attr));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java Tue Nov 9 13:19:26 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.df.mapreduce;
+import org.apache.hadoop.io.Writable;
import org.apache.mahout.common.HadoopUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -152,7 +153,7 @@ public class Classifier {
* Will compute the ConfusionMatrix if necessary.
* @param job
*/
- private void parseOutput(Job job) throws IOException {
+ private void parseOutput(JobContext job) throws IOException {
Configuration conf = job.getConfiguration();
FileSystem fs = mappersOutputPath.getFileSystem(conf);
@@ -161,7 +162,7 @@ public class Classifier {
// read all the output
LongWritable key = new LongWritable();
- Text value = new Text();
+ Writable value = new Text();
for (Path path : outfiles) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java Tue Nov 9 13:19:26 2010
@@ -99,8 +99,8 @@ public final class Describe {
}
}
- private static void runTool(String dataPath, List<String> description, String filePath) throws DescriptorException,
- IOException {
+ private static void runTool(String dataPath, Iterable<String> description, String filePath)
+ throws DescriptorException, IOException {
log.info("Generating the descriptor...");
String descriptor = DescriptorUtils.generateDescriptor(description);
@@ -113,8 +113,7 @@ public final class Describe {
DFUtils.storeWritable(new Configuration(), fPath, dataset);
}
- private static Dataset generateDataset(String descriptor, String dataPath) throws IOException,
- DescriptorException {
+ private static Dataset generateDataset(String descriptor, String dataPath) throws IOException, DescriptorException {
Path path = new Path(dataPath);
FileSystem fs = path.getFileSystem(new Configuration());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java Tue Nov 9 13:19:26 2010
@@ -141,7 +141,7 @@ public class FrequenciesJob {
Frequencies[] values = new Frequencies[numMaps];
// read all the outputs
- LongWritable key = new LongWritable();
+ Writable key = new LongWritable();
Frequencies value = new Frequencies();
int index = 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Tue Nov 9 13:19:26 2010
@@ -100,11 +100,9 @@ public final class MahoutDriver {
try {
mainClasses.load(propsStream);
- } catch (Throwable e) {
+ } catch (IOException e) {
//try getting the default one
- propsStream = Thread.currentThread()
- .getContextClassLoader()
- .getResourceAsStream("driver.classes.default.props");
+ propsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("driver.classes.default.props");
mainClasses.load(propsStream);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java Tue Nov 9 13:19:26 2010
@@ -20,9 +20,9 @@ package org.apache.mahout.fpm.pfpgrowth;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.Collection;
import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
@@ -189,7 +189,7 @@ public final class FPGrowthDriver {
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
FPGrowth<String> fp = new FPGrowth<String>();
- Set<String> features = new HashSet<String>();
+ Collection<String> features = new HashSet<String>();
fp.generateTopKFrequentPatterns(
new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
@@ -205,8 +205,7 @@ public final class FPGrowthDriver {
List<Pair<String,TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(fs, conf, path);
for (Pair<String,TopKStringPatterns> entry : frequentPatterns) {
- log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond()
- .toString());
+ log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Tue Nov 9 13:19:26 2010
@@ -18,11 +18,11 @@
package org.apache.mahout.fpm.pfpgrowth;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
-import java.util.Set;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
@@ -49,7 +49,7 @@ public class ParallelFPGrowthMapper exte
Pair<List<Integer>,Long> pattern = it.next();
Integer[] prunedItems = pattern.getFirst().toArray(new Integer[pattern.getFirst().size()]);
- Set<Long> groups = new HashSet<Long>();
+ Collection<Long> groups = new HashSet<Long>();
for (int j = prunedItems.length - 1; j >= 0; j--) { // generate group
// dependent
// shards
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java Tue Nov 9 13:19:26 2010
@@ -23,7 +23,6 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
@@ -49,7 +48,7 @@ public class TransactionSortingMapper ex
InterruptedException {
String[] items = splitter.split(input.toString());
- Set<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
+ Iterable<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
List<Integer> itemSet = new ArrayList<Integer>();
for (String item : uniqueItems) { // remove items not in the fList
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Tue Nov 9 13:19:26 2010
@@ -182,7 +182,7 @@ public final class TransactionTree imple
return false;
}
- public int addPattern(List<Integer> myList, long addCount) {
+ public int addPattern(Iterable<Integer> myList, long addCount) {
int temp = ROOTNODEID;
int ret = 0;
boolean addCountMode = true;