You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/23 18:48:31 UTC
svn commit: r688364 [1/3] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/mahout/classifier/bayes/
core/src/main/java/org/apache/mahout/classifier/bayes/common/
core/src/main/java/org/apache/mah...
Author: srowen
Date: Sat Aug 23 09:48:28 2008
New Revision: 688364
URL: http://svn.apache.org/viewvc?rev=688364&view=rev
Log:
Many more small tweaks following conversations on the mailing list. Mostly optimizing/reordering imports
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureOutputFormat.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfOutputFormat.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerOutputFormat.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCombiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Classifier.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseMatrix.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/VisibleCanopy.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/VisibleCluster.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/EvalMapperTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/MahoutEvaluatorTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluatorTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseMatrix.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseColumnMatrix.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseMatrix.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseRowMatrix.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/utils/DummyOutputCollector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/utils/StringUtilsTest.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/ejb/RecommenderEJB.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/ejb/RecommenderEJBBean.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/ejb/RecommenderEJBLocal.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommender.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDCrossover.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFactory.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDMutation.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/BruteForceTravellingSalesman.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/EvolutionaryTravellingSalesman.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/RouteEvaluator.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtilsTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sat Aug 23 09:48:28 2008
@@ -17,21 +17,30 @@
* limitations under the License.
*/
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.cli.PosixParser;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.OptionBuilder;
-import java.io.*;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
import java.nio.charset.Charset;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
/**
* Flatten a file into format that can be read by the Bayes M/R job. <p/> One
@@ -39,7 +48,8 @@
* line are the terms.
*/
public class BayesFileFormatter {
- private static String LINE_SEP = System.getProperty("line.separator");
+
+ private static final String LINE_SEP = System.getProperty("line.separator");
/**
* Collapse all the files in the inputDir into a single file in the proper
@@ -90,13 +100,13 @@
* directories and don't have to loop the list twice
*/
private static class FileProcessor implements FileFilter {
- private String label;
+ private final String label;
- private Analyzer analyzer;
+ private final Analyzer analyzer;
private File outputDir;
- private Charset charset;
+ private final Charset charset;
private Writer writer;
@@ -180,7 +190,8 @@
// TextInputFormat
Token token = new Token();
CharArraySet seen = new CharArraySet(256, false);
- long numTokens = 0;
+ // TODO srowen wonders that 'seen' is updated but not used?
+ //long numTokens = 0;
while ((token = ts.next(token)) != null) {
char[] termBuffer = token.termBuffer();
int termLen = token.termLength();
@@ -191,7 +202,7 @@
System.arraycopy(termBuffer, 0, tmp, 0, termLen);
seen.add(tmp);// do this b/c CharArraySet doesn't allow offsets
}
- numTokens++;
+ ///numTokens++;
}
@@ -207,7 +218,7 @@
throws IOException {
TokenStream ts = analyzer.tokenStream("", reader);
- Token token = null;
+ Token token;
List<String> coll = new ArrayList<String>();
while ((token = ts.next()) != null) {
char[] termBuffer = token.termBuffer();
@@ -215,7 +226,7 @@
String val = new String(termBuffer, 0, termLen);
coll.add(val);
}
- return (String[]) coll.toArray(new String[coll.size()]);
+ return coll.toArray(new String[coll.size()]);
}
/**
@@ -259,18 +270,18 @@
Option helpOpt = OptionBuilder.withLongOpt("help").withDescription(
"Print out help info").create("h");
options.addOption(helpOpt);
- CommandLine cmdLine = null;
+ CommandLine cmdLine;
try {
PosixParser parser = new PosixParser();
cmdLine = parser.parse(options, args);
if (cmdLine.hasOption(helpOpt.getOpt())) {
System.out.println("Options: " + options);
- System.exit(0);
+ return;
}
File input = new File(cmdLine.getOptionValue(inputOpt.getOpt()));
File output = new File(cmdLine.getOptionValue(outputOpt.getOpt()));
String label = cmdLine.getOptionValue(labelOpt.getOpt());
- Analyzer analyzer = null;
+ Analyzer analyzer;
if (cmdLine.hasOption(analyzerOpt.getOpt())) {
analyzer = (Analyzer) Class.forName(
cmdLine.getOptionValue(analyzerOpt.getOpt())).newInstance();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Sat Aug 23 09:48:28 2008
@@ -16,12 +16,17 @@
* limitations under the License.
*/
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.PosixParser;
import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.classifier.bayes.BayesClassifier;
import org.apache.mahout.classifier.bayes.BayesModel;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
@@ -29,17 +34,14 @@
import org.apache.mahout.classifier.cbayes.CBayesModel;
import org.apache.mahout.common.Classifier;
import org.apache.mahout.common.Model;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.Analyzer;
-import java.io.IOException;
import java.io.File;
-import java.io.InputStreamReader;
import java.io.FileInputStream;
-import java.util.*;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
@@ -67,7 +69,7 @@
options.addOption(typeOpt);
- CommandLine cmdLine = null;
+ CommandLine cmdLine;
try {
PosixParser parser = new PosixParser();
cmdLine = parser.parse(options, args);
@@ -146,7 +148,7 @@
List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize) ;
System.out.println("Done converting");
System.out.println("Classifying document: " + docPath);
- ClassifierResult category = classifier.classify(model, doc.toArray(new String[]{}), defaultCat);
+ ClassifierResult category = classifier.classify(model, doc.toArray(new String[doc.size()]), defaultCat);
System.out.println("Category for " + docPath + " is " + category);
}
catch (ParseException exp) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Sat Aug 23 09:48:28 2008
@@ -1,9 +1,12 @@
package org.apache.mahout.classifier;
-import java.util.*;
-
import org.apache.commons.lang.StringUtils;
import org.apache.mahout.common.Summarizable;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
@@ -28,7 +31,7 @@
Collection<String> labels = new ArrayList<String>();
- Map<String, Integer> labelMap = new HashMap<String, Integer>();
+ final Map<String, Integer> labelMap = new HashMap<String, Integer>();
int[][] confusionMatrix = null;
@@ -53,10 +56,7 @@
public int getCorrect(String label){
int labelId = labelMap.get(label).intValue();
- int correct = 0;
- correct = confusionMatrix[labelId][labelId];
-
- return correct;
+ return confusionMatrix[labelId][labelId];
}
public float getTotal(String label){
@@ -123,8 +123,8 @@
if (this.getLabels().size() != b.getLabels().size())
throw new Exception("The Labels do not Match");
- if (this.getLabels().containsAll(b.getLabels()))
- ;
+ //if (this.getLabels().containsAll(b.getLabels()))
+ // ;
for (String correctLabel : this.labels) {
for (String classifiedLabel : this.labels) {
incrementCount(correctLabel, classifiedLabel, b.getCount(correctLabel,
@@ -144,8 +144,7 @@
for (String correctLabel : this.labels) {
returnString.append(StringUtils.rightPad(getSmallLabel(labelMap.get(
- correctLabel).intValue()), 5)
- + "\t");
+ correctLabel).intValue()), 5)).append('\t');
}
returnString.append("<--Classified as\n");
@@ -153,16 +152,13 @@
for (String correctLabel : this.labels) {
Integer labelTotal = 0;
for (String classifiedLabel : this.labels) {
- returnString.append(StringUtils.rightPad(new Integer(getCount(
- correctLabel, classifiedLabel)).toString(), 5)
- + "\t");
+ returnString.append(StringUtils.rightPad(Integer.toString(getCount(
+ correctLabel, classifiedLabel)), 5)).append('\t');
labelTotal+=getCount(correctLabel, classifiedLabel);
}
- returnString.append(" | "
- + StringUtils.rightPad(labelTotal.toString(), 6)
- + "\t"
- + StringUtils.rightPad(getSmallLabel(labelMap.get(correctLabel)
- .intValue()), 5) + " = " + correctLabel + "\n");
+ returnString.append(" | ").append(StringUtils.rightPad(labelTotal.toString(), 6)).append('\t')
+ .append(StringUtils.rightPad(getSmallLabel(labelMap.get(correctLabel).intValue()), 5))
+ .append(" = ").append(correctLabel).append('\n');
}
returnString.append("\n");
return returnString.toString();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Sat Aug 23 09:48:28 2008
@@ -1,11 +1,11 @@
package org.apache.mahout.classifier;
-import java.text.*;
-import java.util.*;
-
import org.apache.commons.lang.StringUtils;
import org.apache.mahout.common.Summarizable;
+import java.text.DecimalFormat;
+import java.util.Collection;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
@@ -74,25 +74,15 @@
DecimalFormat decimalFormatter = new DecimalFormat("0.####");
returnString.append(StringUtils.rightPad("Correctly Classified Instances",
- 40)
- + ": "
- + StringUtils.leftPad(new Integer(correctlyClassified).toString(), 10)
- + "\t"
- + StringUtils.leftPad(decimalFormatter.format(percentageCorrect), 10)
- + "%\n");
+ 40)).append(": ").append(StringUtils.leftPad(Integer.toString(correctlyClassified), 10))
+ .append('\t').append(StringUtils.leftPad(decimalFormatter.format(percentageCorrect), 10)).append("%\n");
returnString.append(StringUtils.rightPad(
- "Incorrectly Classified Instances", 40)
- + ": "
- + StringUtils
- .leftPad(new Integer(incorrectlyClassified).toString(), 10)
- + "\t"
- + StringUtils.leftPad(decimalFormatter.format(percentageIncorrect), 10)
- + "%\n");
- returnString.append(StringUtils.rightPad("Total Classified Instances", 40)
- + ": "
- + StringUtils.leftPad(new Integer(totalClassified).toString(), 10)
- + "\n");
- returnString.append("\n");
+ "Incorrectly Classified Instances", 40)).append(": ").append(StringUtils
+ .leftPad(Integer.toString(incorrectlyClassified), 10)).append('\t')
+ .append(StringUtils.leftPad(decimalFormatter.format(percentageIncorrect), 10)).append("%\n");
+ returnString.append(StringUtils.rightPad("Total Classified Instances", 40)).append(": ")
+ .append(StringUtils.leftPad(Integer.toString(totalClassified), 10)).append('\n');
+ returnString.append('\n');
returnString.append(confusionMatrix.summarize());
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java Sat Aug 23 09:48:28 2008
@@ -22,9 +22,9 @@
import org.apache.mahout.common.Model;
import java.util.Collection;
-import java.util.Enumeration;
-import java.util.Hashtable;
+import java.util.HashMap;
import java.util.LinkedList;
+import java.util.Map;
/**
@@ -45,7 +45,7 @@
Collection<String> categories = model.getLabels();
PriorityQueue pq = new ClassifierResultPriorityQueue(numResults);
- ClassifierResult tmp = null;
+ ClassifierResult tmp;
for (String category : categories){
float prob = documentProbability(model, category, document);
if (prob < 0) {
@@ -99,8 +99,8 @@
* @see Model#FeatureWeight(String, String)
*/
public float documentProbability(Model model, String label, String[] document) {
- float result = 0f;
- Hashtable<String, Integer> wordList = new Hashtable<String, Integer>(1000);
+ float result = 0.0f;
+ Map<String, Integer> wordList = new HashMap<String, Integer>(1000);
for (String word : document) {
if (wordList.containsKey(word)) {
Integer count = wordList.get(word);
@@ -108,11 +108,11 @@
wordList.put(word, count);
} else {
wordList.put(word, 1);
- }
+ }
}
- for (Enumeration<String> e = wordList.keys(); e.hasMoreElements();) {
- String word = e.nextElement();
- Integer count = wordList.get(word);
+ for (Map.Entry<String, Integer> entry : wordList.entrySet()) {
+ String word = entry.getKey();
+ Integer count = entry.getValue();
result += count * model.FeatureWeight(label, word);
}
return result;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java Sat Aug 23 09:48:28 2008
@@ -17,9 +17,9 @@
* limitations under the License.
*/
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.classifier.bayes.common.BayesFeatureDriver;
import org.apache.mahout.classifier.bayes.common.BayesTfIdfDriver;
import org.apache.mahout.classifier.bayes.common.BayesWeightSummerDriver;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java Sat Aug 23 09:48:28 2008
@@ -54,7 +54,7 @@
@Override
protected float getWeightUnprocessed(Integer label, Integer feature) {
- float result = 0.0f;
+ float result;
Map<Integer, Float> featureWeights = featureLabelWeights.get(feature);
if (featureWeights.containsKey(label)) {
@@ -99,6 +99,7 @@
float D_ij = getWeightUnprocessed(label, feature);
float sumLabelWeight = getSumLabelWeight(label);
+ // TODO srowen says sigma_j is unused
float sigma_j = getSumFeatureWeight(feature);
float numerator = D_ij + alpha_i;
@@ -153,8 +154,7 @@
*/
@Override
public float FeatureWeight(Integer label, Integer feature) {
- float weight = getWeight(label, feature);
- return weight;
+ return getWeight(label, feature);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerDriver.java Sat Aug 23 09:48:28 2008
@@ -16,20 +16,21 @@
* limitations under the License.
*/
-import java.util.HashMap;
-
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import java.util.HashMap;
+import java.util.Map;
+
/**
* Create and run the Bayes Theta Normalization Step.
@@ -56,7 +57,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
@@ -90,7 +90,7 @@
String labelWeightSumString = mapStringifier.toString(labelWeightSum);
System.out.println("Sigma_k for Each Label");
- HashMap<String,Float> c = mapStringifier.fromString(labelWeightSumString);
+ Map<String,Float> c = mapStringifier.fromString(labelWeightSumString);
System.out.println(c);
conf.set("cnaivebayes.sigma_k", labelWeightSumString);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java Sat Aug 23 09:48:28 2008
@@ -28,12 +28,8 @@
import org.apache.hadoop.util.GenericsUtil;
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
-/**
- *
- *
- */
public class BayesThetaNormalizerMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java Sat Aug 23 09:48:28 2008
@@ -16,12 +16,14 @@
* limitations under the License.
*/
-import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.KeyValueTextInputFormat;
/**
@@ -49,22 +51,19 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
-
+
@SuppressWarnings("deprecation")
public static void runJob(String input, String output, int gramSize) {
JobClient client = new JobClient();
JobConf conf = new JobConf(BayesFeatureDriver.class);
-
+
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(FloatWritable.class);
- //conf.setInputPath(new Path(input));
- FileInputFormat.setInputPaths(conf, new Path(input));
+ conf.setInputPath(new Path(input));
Path outPath = new Path(output);
- //conf.setOutputPath(outPath);
- FileOutputFormat.setOutputPath(conf, outPath);
+ conf.setOutputPath(outPath);
conf.setNumMapTasks(100);
//conf.setNumReduceTasks(1);
conf.setMapperClass(BayesFeatureMapper.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java Sat Aug 23 09:48:28 2008
@@ -29,11 +29,9 @@
import org.apache.mahout.common.Model;
import java.io.IOException;
-import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
import java.util.List;
-import java.util.StringTokenizer;
-import java.util.Hashtable;
-import java.util.Enumeration;
/**
* Reads the input train set(preprocessed using the {@link BayesFileFormatter}).
@@ -41,9 +39,9 @@
*/
public class BayesFeatureMapper extends MapReduceBase implements
Mapper<Text, Text, Text, FloatWritable> {
- private final static FloatWritable one = new FloatWritable(1.00f);
+ private final static FloatWritable one = new FloatWritable(1.0f);
- private Text labelWord = new Text();
+ private final Text labelWord = new Text();
private int gramSize = 1;
@@ -63,35 +61,34 @@
OutputCollector<Text, FloatWritable> output, Reporter reporter)
throws IOException {
String line = value.toString();
- StringTokenizer itr = new StringTokenizer(line);
String label = key.toString();
int keyLen = label.length();
- Hashtable<String, Integer> wordList = new Hashtable<String, Integer>(1000);
+ Map<String, Integer> wordList = new HashMap<String, Integer>(1000);
+ // TODO: srowen wonders where wordList is ever updated?
StringBuilder builder = new StringBuilder(label);
builder.ensureCapacity(32);// make sure we have a reasonably size buffer to
// begin with
- List<String> previousN_1Grams = Model.generateNGramsWithoutLabel(line, keyLen);
+ // TODO: srowen says this var isn't used right now
+ List<String> previousN_1Grams = Model.generateNGramsWithoutLabel(line, keyLen);
- Double lengthNormalisation = new Double(0.0d);
- for (Enumeration<String> e = wordList.keys(); e.hasMoreElements();) {
+ double lengthNormalisation = 0.0;
+ for (double D_kj : wordList.values()) {
// key is label,word
- String token = e.nextElement();
- Double D_kj = new Double(wordList.get(token).doubleValue());
- lengthNormalisation += (double) (D_kj * D_kj);
+ lengthNormalisation += D_kj * D_kj;
}
lengthNormalisation = Math.sqrt(lengthNormalisation);
// Ouput Length Normalized + TF Transformed Frequency per Word per Class
// Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
- for (Enumeration<String> e = wordList.keys(); e.hasMoreElements();) {
+ for (Map.Entry<String, Integer> entry : wordList.entrySet()) {
// key is label,word
- String token = e.nextElement();
+ String token = entry.getKey();
builder.append(",").append(token);
labelWord.set(builder.toString());
FloatWritable f = new FloatWritable((float) (Math
- .log((double) (1 + wordList.get(token))) / lengthNormalisation));
+ .log(1.0 + entry.getValue()) / lengthNormalisation));
output.collect(labelWord, f);
builder.setLength(keyLen);// truncate back
}
@@ -100,9 +97,8 @@
String dflabel = "-" + label;
int dfKeyLen = dflabel.length();
builder = new StringBuilder(dflabel);
- for (Enumeration<String> e = wordList.keys(); e.hasMoreElements();) {
+ for (String token : wordList.keySet()) {
// key is label,word
- String token = e.nextElement();
builder.append(",").append(token);
labelWord.set(builder.toString());
output.collect(labelWord, one);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureOutputFormat.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureOutputFormat.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureOutputFormat.java Sat Aug 23 09:48:28 2008
@@ -16,10 +16,7 @@
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.hadoop.fs.FileSystem;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@@ -29,6 +26,8 @@
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
+import java.io.IOException;
+
/**
* This class extends the MultipleOutputFormat, allowing to write the output data to different output files in sequence file output format.
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfDriver.java Sat Aug 23 09:48:28 2008
@@ -16,19 +16,20 @@
* limitations under the License.
*/
-import java.util.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* The Driver which drives the Tf-Idf Generation
@@ -55,7 +56,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
@@ -94,7 +94,7 @@
String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts);
System.out.println("Counts of documents in Each Label");
- HashMap<String,Float> c = mapStringifier.fromString(labelDocumentCountString);
+ Map<String,Float> c = mapStringifier.fromString(labelDocumentCountString);
System.out.println(c);
conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfMapper.java Sat Aug 23 09:48:28 2008
@@ -18,8 +18,8 @@
*/
-import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -28,14 +28,9 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.GenericsUtil;
-
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
-/**
- *
- *
- */
public class BayesTfIdfMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfOutputFormat.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfOutputFormat.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesTfIdfOutputFormat.java Sat Aug 23 09:48:28 2008
@@ -16,8 +16,6 @@
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -28,6 +26,8 @@
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
+import java.io.IOException;
+
/**
* This class extends the MultipleOutputFormat, allowing to write the output data to different output files in sequence file output format.
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerDriver.java Sat Aug 23 09:48:28 2008
@@ -16,14 +16,14 @@
* limitations under the License.
*/
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
/**
@@ -51,7 +51,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java Sat Aug 23 09:48:28 2008
@@ -24,13 +24,9 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-
import java.io.IOException;
-/**
- *
- *
- */
+
public class BayesWeightSummerMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerOutputFormat.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerOutputFormat.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerOutputFormat.java Sat Aug 23 09:48:28 2008
@@ -16,8 +16,6 @@
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -28,6 +26,8 @@
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
+import java.io.IOException;
+
/**
* This class extends the MultipleOutputFormat, allowing to write the output data to different output files in sequence file output format.
*/
@@ -45,6 +45,7 @@
}
return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
+
@Override
protected String generateFileNameForKeyValue(WritableComparable k, Writable v,
String name) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java Sat Aug 23 09:48:28 2008
@@ -17,19 +17,20 @@
* limitations under the License.
*/
-import org.apache.mahout.common.Model;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.common.Model;
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
/**
* This Class reads the different interim files created during the Training stage as well as the Model File during testing.
@@ -207,7 +208,7 @@
} else {
int idx = keyStr.indexOf(",");
if (idx != -1) {
- HashMap<String, Float> data = new HashMap<String, Float>();
+ Map<String, Float> data = new HashMap<String, Float>();
data.put(keyStr.substring(0, idx), new Float(value.get()));
writer.append(new Text(key.toString()), value);
}
@@ -287,8 +288,7 @@
}
}
- Float sigma_jSigma_k = weightSum.get("*");
- return sigma_jSigma_k;
+ return weightSum.get("*");
}
public Float readVocabCount(FileSystem fs, Path pathPattern,
@@ -314,8 +314,7 @@
}
}
- Float sigma_jSigma_k = weightSum.get("*vocabCount");
- return sigma_jSigma_k;
+ return weightSum.get("*vocabCount");
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java Sat Aug 23 09:48:28 2008
@@ -22,9 +22,9 @@
import org.apache.mahout.common.Model;
import java.util.Collection;
-import java.util.Enumeration;
-import java.util.Hashtable;
+import java.util.HashMap;
import java.util.LinkedList;
+import java.util.Map;
/**
* Classifies documents based on a {@link CBayesModel}.
@@ -43,7 +43,7 @@
public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) {
Collection<String> categories = model.getLabels();
PriorityQueue pq = new ClassifierResultPriorityQueue(numResults);
- ClassifierResult tmp = null;
+ ClassifierResult tmp;
for (String category : categories){
float prob = documentProbability(model, category, document);
if (prob < 0) {
@@ -97,7 +97,7 @@
*/
public float documentProbability(Model model, String label, String[] document) {
float result = 0.0f;
- Hashtable<String, Integer> wordList = new Hashtable<String, Integer>(1000);
+ Map<String, Integer> wordList = new HashMap<String, Integer>(1000);
for (String word : document) {
if (wordList.containsKey(word)) {
Integer count = wordList.get(word);
@@ -107,9 +107,9 @@
wordList.put(word, 1);
}
}
- for (Enumeration<String> e = wordList.keys(); e.hasMoreElements();) {
- String word = e.nextElement();
- Integer count = wordList.get(word);
+ for (Map.Entry<String, Integer> entry : wordList.entrySet()) {
+ String word = entry.getKey();
+ Integer count = entry.getValue();
result += count * model.FeatureWeight(label, word);
}
return result;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesDriver.java Sat Aug 23 09:48:28 2008
@@ -17,9 +17,9 @@
* limitations under the License.
*/
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.classifier.bayes.common.BayesFeatureDriver;
import org.apache.mahout.classifier.bayes.common.BayesTfIdfDriver;
import org.apache.mahout.classifier.bayes.common.BayesWeightSummerDriver;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java Sat Aug 23 09:48:28 2008
@@ -17,16 +17,10 @@
* limitations under the License.
*/
-
import org.apache.mahout.common.Model;
import java.util.Map;
-
-/**
- *
- *
- */
public class CBayesModel extends Model {
@Override
@@ -51,7 +45,7 @@
@Override
protected float getWeightUnprocessed(Integer label, Integer feature) {
- float result = 0.0f;
+ float result;
Map<Integer, Float> featureWeights = featureLabelWeights.get(feature);
if (featureWeights.containsKey(label)) {
@@ -187,8 +181,7 @@
*/
@Override
public float FeatureWeight(Integer label, Integer feature) {
- float weight = getWeight(label, feature);
- return weight;
+ return getWeight(label, feature);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightDriver.java Sat Aug 23 09:48:28 2008
@@ -16,19 +16,20 @@
* limitations under the License.
*/
-import java.util.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Create and run the Bayes Trainer.
@@ -55,7 +56,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
@@ -105,7 +105,7 @@
DefaultStringifier<HashMap<String,Float>> mapStringifier = new DefaultStringifier<HashMap<String,Float>>(conf, GenericsUtil.getClass(thetaNormalizer));
String thetaNormalizationsString = mapStringifier.toString(thetaNormalizer);
- HashMap<String,Float> c = mapStringifier.fromString(thetaNormalizationsString);
+ Map<String,Float> c = mapStringifier.fromString(thetaNormalizationsString);
System.out.println(c);
conf.set("cnaivebayes.thetaNormalizations", thetaNormalizationsString);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java Sat Aug 23 09:48:28 2008
@@ -17,8 +17,8 @@
* limitations under the License.
*/
-import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -28,12 +28,9 @@
import org.apache.hadoop.util.GenericsUtil;
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
+
-/**
- *
- *
- */
public class CBayesNormalizedWeightMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
@@ -58,7 +55,7 @@
String labelFeaturePair = key.toString();
String label = labelFeaturePair.split(",")[0];
- output.collect(key, new FloatWritable((float)(-1.0f * (float)Math.log(value.get())/thetaNormalizer.get(label))));// output -D_ij
+ output.collect(key, new FloatWritable((-1.0f * (float)Math.log(value.get())/thetaNormalizer.get(label))));// output -D_ij
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightReducer.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightReducer.java Sat Aug 23 09:48:28 2008
@@ -31,12 +31,9 @@
/**
* Can also be used as a local Combiner beacuse only two values should be there inside the values
*
- **/
-
+ */
public class CBayesNormalizedWeightReducer extends MapReduceBase implements Reducer<Text, FloatWritable, Text, FloatWritable> {
-
-
public void reduce(Text key, Iterator<FloatWritable> values, OutputCollector<Text, FloatWritable> output, Reporter reporter) throws IOException {
//Key is label,word, value is the number of times we've seen this label word per local node. Output is the same
String token = key.toString();
@@ -44,7 +41,7 @@
while (values.hasNext()) {
weight += values.next().get();
}
- if(token.equalsIgnoreCase(new String("rec.motorcycles,miller")))
+ if(token.equalsIgnoreCase("rec.motorcycles,miller"))
System.out.println(token + "=>" + weight);
output.collect(key, new FloatWritable(weight));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java Sat Aug 23 09:48:28 2008
@@ -16,19 +16,20 @@
* limitations under the License.
*/
-import java.util.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Create and run the Bayes Trainer.
@@ -55,7 +56,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
@@ -91,7 +91,7 @@
String labelWeightSumString = mapStringifier.toString(labelWeightSum);
System.out.println("Sigma_k for Each Label");
- HashMap<String,Float> c = mapStringifier.fromString(labelWeightSumString);
+ Map<String,Float> c = mapStringifier.fromString(labelWeightSumString);
System.out.println(c);
conf.set("cnaivebayes.sigma_k", labelWeightSumString);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java Sat Aug 23 09:48:28 2008
@@ -17,8 +17,8 @@
* limitations under the License.
*/
-import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -28,20 +28,17 @@
import org.apache.hadoop.util.GenericsUtil;
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
+
-/**
- *
- *
- */
public class CBayesThetaMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
public HashMap<String, Float> labelWeightSum = null;
String labelWeightSumString = " ";
- Float sigma_jSigma_k = 0f;
+ Float sigma_jSigma_k = 0.0f;
String sigma_jSigma_kString = " ";
- Float vocabCount = 0f;
+ Float vocabCount = 0.0f;
String vocabCountString = " ";
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java Sat Aug 23 09:48:28 2008
@@ -16,20 +16,20 @@
* limitations under the License.
*/
-import java.util.HashMap;
-
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Create and run the Bayes Trainer.
@@ -56,7 +56,6 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
public static void runJob(String input, String output) {
JobClient client = new JobClient();
@@ -91,7 +90,7 @@
String labelWeightSumString = mapStringifier.toString(labelWeightSum);
System.out.println("Sigma_k for Each Label");
- HashMap<String,Float> c = mapStringifier.fromString(labelWeightSumString);
+ Map<String,Float> c = mapStringifier.fromString(labelWeightSumString);
System.out.println(c);
conf.set("cnaivebayes.sigma_k", labelWeightSumString);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java Sat Aug 23 09:48:28 2008
@@ -28,12 +28,8 @@
import org.apache.hadoop.util.GenericsUtil;
import java.io.IOException;
-import java.util.*;
+import java.util.HashMap;
-/**
- *
- *
- */
public class CBayesThetaNormalizerMapper extends MapReduceBase implements
Mapper<Text, FloatWritable, Text, FloatWritable> {
@@ -41,11 +37,11 @@
String labelWeightSumString = " ";
- Float sigma_jSigma_k = 0f;
+ Float sigma_jSigma_k = 0.0f;
String sigma_jSigma_kString = " ";
- Float vocabCount = 0f;
+ Float vocabCount = 0.0f;
String vocabCountString = " ";
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.canopy;
-import java.io.IOException;
-import java.util.List;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
@@ -27,6 +24,9 @@
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.utils.DistanceMeasure;
+import java.io.IOException;
+import java.util.List;
+
/**
* This class models a canopy as a center point, the number of points that are
* contained within it according to the application of some distance metric, and
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.canopy;
-import java.io.IOException;
-import java.util.Iterator;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
@@ -29,6 +26,9 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.Iterator;
+
public class CanopyCombiner extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Sat Aug 23 09:48:28 2008
@@ -19,11 +19,11 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
public class CanopyDriver {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java Sat Aug 23 09:48:28 2008
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.clustering.canopy;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
@@ -30,6 +26,10 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
public class CanopyMapper extends MapReduceBase implements
Mapper<WritableComparable, Text, Text, Text> {
@@ -43,9 +43,6 @@
* org.apache.hadoop.mapred.OutputCollector,
* org.apache.hadoop.mapred.Reporter)
*/
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
public void map(WritableComparable key, Text values,
OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
Vector point = AbstractVector.decodeVector(values.toString());
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Sat Aug 23 09:48:28 2008
@@ -16,11 +16,6 @@
*/
package org.apache.mahout.clustering.canopy;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -30,6 +25,11 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
public class CanopyReducer extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Sat Aug 23 09:48:28 2008
@@ -19,10 +19,10 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.IdentityReducer;
public class ClusterDriver {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java Sat Aug 23 09:48:28 2008
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.clustering.canopy;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
@@ -33,6 +29,10 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
public class ClusterMapper extends MapReduceBase implements
Mapper<WritableComparable, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.kmeans;
-import java.io.IOException;
-import java.util.List;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
@@ -27,6 +24,9 @@
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.utils.DistanceMeasure;
+import java.io.IOException;
+import java.util.List;
+
public class Cluster {
public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.kmeans;
-import java.io.IOException;
-import java.util.Iterator;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -27,6 +24,9 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.matrix.AbstractVector;
+import java.io.IOException;
+import java.util.Iterator;
+
public class KMeansCombiner extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Sat Aug 23 09:48:28 2008
@@ -20,11 +20,11 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Sat Aug 23 09:48:28 2008
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.clustering.kmeans;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
@@ -33,6 +29,10 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
public class KMeansMapper extends MapReduceBase implements
Mapper<WritableComparable, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.kmeans;
-import java.io.IOException;
-import java.util.Iterator;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@@ -28,6 +25,9 @@
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.Iterator;
+
public class KMeansReducer extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Sat Aug 23 09:48:28 2008
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.clustering.meanshift;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
@@ -31,6 +27,10 @@
import org.apache.mahout.utils.DistanceMeasure;
import org.apache.mahout.utils.EuclideanDistanceMeasure;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* This class models a canopy as a center point, the number of points that are
* contained within it according to the application of some distance metric, and
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCombiner.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCombiner.java Sat Aug 23 09:48:28 2008
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.clustering.meanshift;
-import java.io.IOException;
-import java.util.Iterator;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@@ -31,6 +28,9 @@
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
+import java.io.IOException;
+import java.util.Iterator;
+
public class MeanShiftCanopyCombiner extends MapReduceBase implements
Reducer<Text, WritableComparable, Text, WritableComparable> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=688364&r1=688363&r2=688364&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sat Aug 23 09:48:28 2008
@@ -18,12 +18,12 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;