You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/08/13 20:19:18 UTC
svn commit: r985313 [1/2] - in /mahout/trunk:
examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/
examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/
examples/src/main/java/org/apache/mahout/cf/taste/example/jester/...
Author: srowen
Date: Fri Aug 13 18:19:16 2010
New Revision: 985313
URL: http://svn.apache.org/viewvc?rev=985313&view=rev
Log:
Another assault on style issues
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/ExecutionPanel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/ItineraryPanel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/StrategyPanel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/TravellingSalesman.java
mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java
mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
mahout/trunk/utils/src/main/java/org/apache/mahout/text/DefaultAnalyzer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DocumentProcessor.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/document/SequenceFileTokenizerMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapperTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducerTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitionerTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java Fri Aug 13 18:19:16 2010
@@ -40,13 +40,9 @@ public final class BookCrossingBooleanRe
public static void main(String... args) throws IOException, TasteException, OptionException {
RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
- DataModel model;
File ratingsFile = TasteOptionParser.getRatings(args);
- if (ratingsFile != null) {
- model = new BookCrossingDataModel(ratingsFile, true);
- } else {
- model = new BookCrossingDataModel(true);
- }
+ DataModel model =
+ ratingsFile == null ? new BookCrossingDataModel(true) : new BookCrossingDataModel(ratingsFile, true);
IRStatistics evaluation = evaluator.evaluate(
new BookCrossingBooleanRecommenderBuilder(),
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java Fri Aug 13 18:19:16 2010
@@ -39,14 +39,10 @@ public final class BookCrossingRecommend
public static void main(String... args) throws IOException, TasteException, OptionException {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
- DataModel model;
File ratingsFile = TasteOptionParser.getRatings(args);
- if (ratingsFile != null) {
- model = new BookCrossingDataModel(ratingsFile, false);
- } else {
- model = new BookCrossingDataModel(false);
- }
-
+ DataModel model =
+ ratingsFile == null ? new BookCrossingDataModel(false) : new BookCrossingDataModel(ratingsFile, false);
+
double evaluation = evaluator.evaluate(new BookCrossingRecommenderBuilder(),
null,
model,
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java Fri Aug 13 18:19:16 2010
@@ -43,13 +43,8 @@ public final class GroupLensRecommenderE
public static void main(String... args) throws IOException, TasteException, OptionException {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
- DataModel model;
File ratingsFile = TasteOptionParser.getRatings(args);
- if (ratingsFile != null) {
- model = new GroupLensDataModel(ratingsFile);
- } else {
- model = new GroupLensDataModel();
- }
+ DataModel model = ratingsFile == null ? new GroupLensDataModel() : new GroupLensDataModel(ratingsFile);
double evaluation = evaluator.evaluate(new GroupLensRecommenderBuilder(),
null,
model,
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java Fri Aug 13 18:19:16 2010
@@ -39,13 +39,8 @@ public final class JesterRecommenderEval
public static void main(String... args) throws IOException, TasteException, OptionException {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
- DataModel model;
File ratingsFile = TasteOptionParser.getRatings(args);
- if (ratingsFile != null) {
- model = new JesterDataModel(ratingsFile);
- } else {
- model = new JesterDataModel();
- }
+ DataModel model = ratingsFile == null ? new JesterDataModel() : new JesterDataModel(ratingsFile);
double evaluation = evaluator.evaluate(new JesterRecommenderBuilder(),
null,
model,
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Fri Aug 13 18:19:16 2010
@@ -58,20 +58,17 @@ public class WikipediaDatasetCreatorMapp
private Analyzer analyzer;
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
StringBuilder contents = new StringBuilder();
String document = value.toString();
String catMatch = findMatchingCategory(document);
- if (!catMatch.equals("Unknown")) {
+ if (!"Unknown".equals(catMatch)) {
document = StringEscapeUtils.unescapeHtml(WikipediaDatasetCreatorMapper.CLOSE_TEXT_TAG_PATTERN.matcher(
WikipediaDatasetCreatorMapper.OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
- TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
+ TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
while (stream.incrementToken()) {
contents.append(termAtt.termBuffer(), 0, termAtt.termLength()).append(' ');
}
@@ -80,10 +77,6 @@ public class WikipediaDatasetCreatorMapp
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
- */
- @SuppressWarnings("unchecked")
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
@@ -103,7 +96,7 @@ public class WikipediaDatasetCreatorMapp
exactMatchOnly = conf.getBoolean("exact.match.only", false);
if (analyzer == null) {
String analyzerStr = conf.get("analyzer.class", WikipediaAnalyzer.class.getName());
- Class<? extends Analyzer> analyzerClass = (Class<? extends Analyzer>) Class.forName(analyzerStr);
+ Class<? extends Analyzer> analyzerClass = Class.forName(analyzerStr).asSubclass(Analyzer.class);
analyzer = analyzerClass.newInstance();
}
} catch (IOException ex) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java Fri Aug 13 18:19:16 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.classifier.bayes;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
@@ -28,16 +27,12 @@ import org.apache.hadoop.mapreduce.Reduc
*/
public class WikipediaDatasetCreatorReducer extends Reducer<Text, Text, Text, Text> {
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// Key is label,word, value is the number of times we've seen this label
// word per local node. Output is the same
- Iterator<Text> it = values.iterator();
- while (it.hasNext()) {
- context.write(key, it.next());
+ for (Text value : values) {
+ context.write(key, value);
}
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java Fri Aug 13 18:19:16 2010
@@ -33,7 +33,7 @@ import org.apache.hadoop.mapreduce.lib.i
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
- * Reads records that are delimited by a specifc begin/end tag.
+ * Reads records that are delimited by a specific begin/end tag.
*/
public class XmlInputFormat extends TextInputFormat {
@@ -41,17 +41,12 @@ public class XmlInputFormat extends Text
public static final String END_TAG_KEY = "xmlinput.end";
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.lib.input.TextInputFormat#createRecordReader(org.apache.hadoop.mapreduce.InputSplit, org.apache.hadoop.mapreduce.TaskAttemptContext)
- */
@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split,
TaskAttemptContext context) {
try {
return new XmlRecordReader((FileSplit) split, context.getConfiguration());
} catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
return null;
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java Fri Aug 13 18:19:16 2010
@@ -19,28 +19,18 @@ package org.apache.mahout.clustering.dis
import java.awt.Graphics;
import java.awt.Graphics2D;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import org.apache.hadoop.fs.Path;
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.canopy.Canopy;
-import org.apache.mahout.clustering.canopy.CanopyClusterer;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
class DisplayCanopy extends DisplayClustering {
- private static final long serialVersionUID = 1L;
-
DisplayCanopy() {
initialize();
- this.setTitle("Canopy Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
+ this.setTitle("Canopy Clusters (>" + (int) (getSignificance() * 100) + "% of population)");
}
@Override
@@ -49,9 +39,8 @@ class DisplayCanopy extends DisplayClust
plotClusters((Graphics2D) g);
}
- public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
- IllegalAccessException {
- SIGNIFICANCE = 0.05;
+ public static void main(String[] args) throws Exception {
+ //SIGNIFICANCE = 0.05;
Path samples = new Path("samples");
Path output = new Path("output");
HadoopUtil.overwriteOutput(samples);
@@ -59,22 +48,23 @@ class DisplayCanopy extends DisplayClust
RandomUtils.useTestSeed();
generateSamples();
writeSampleData(samples);
- boolean b = true;
- if (b) {
+ //boolean b = true;
+ //if (b) {
new CanopyDriver().buildClusters(samples, output, ManhattanDistanceMeasure.class.getName(), T1, T2, true);
loadClusters(output);
- } else {
- List<Vector> points = new ArrayList<Vector>();
- for (VectorWritable sample : SAMPLE_DATA) {
- points.add(sample.get());
- }
- List<Canopy> canopies = CanopyClusterer.createCanopies(points, new ManhattanDistanceMeasure(), T1, T2);
- CanopyClusterer.updateCentroids(canopies);
- List<Cluster> clusters = new ArrayList<Cluster>();
- for (Canopy canopy : canopies)
- clusters.add(canopy);
- CLUSTERS.add(clusters);
- }
+ //} else {
+ // List<Vector> points = new ArrayList<Vector>();
+ // for (VectorWritable sample : SAMPLE_DATA) {
+ // points.add(sample.get());
+ // }
+ // List<Canopy> canopies = CanopyClusterer.createCanopies(points, new ManhattanDistanceMeasure(), T1, T2);
+ // CanopyClusterer.updateCentroids(canopies);
+ // List<Cluster> clusters = new ArrayList<Cluster>();
+ // for (Canopy canopy : canopies) {
+ // clusters.add(canopy);
+ // }
+ // CLUSTERS.add(clusters);
+ //}
new DisplayCanopy();
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Fri Aug 13 18:19:16 2010
@@ -64,8 +64,6 @@ public class DisplayClustering extends F
protected static final List<List<Cluster>> CLUSTERS = new ArrayList<List<Cluster>>();
- protected static double SIGNIFICANCE = 0.05;
-
protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
Color.lightGray };
@@ -113,7 +111,7 @@ public class DisplayClustering extends F
plotClusters(g2);
}
- protected void plotClusters(Graphics2D g2) {
+ protected static void plotClusters(Graphics2D g2) {
int cx = CLUSTERS.size() - 1;
for (List<Cluster> clusters : CLUSTERS) {
g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
@@ -124,7 +122,7 @@ public class DisplayClustering extends F
}
}
- protected void plotSampleParameters(Graphics2D g2) {
+ protected static void plotSampleParameters(Graphics2D g2) {
Vector v = new DenseVector(2);
Vector dv = new DenseVector(2);
g2.setColor(Color.RED);
@@ -137,7 +135,7 @@ public class DisplayClustering extends F
}
}
- protected void plotSampleData(Graphics2D g2) {
+ protected static void plotSampleData(Graphics2D g2) {
double sx = (double) res / DS;
g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
@@ -250,7 +248,7 @@ public class DisplayClustering extends F
for (FileStatus s : status) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
try {
- Text key = new Text();
+ Writable key = new Text();
Writable value = (Writable) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
Cluster cluster = (Cluster) value;
@@ -267,12 +265,11 @@ public class DisplayClustering extends F
}
protected static void loadClusters(Path output) throws IOException, InstantiationException, IllegalAccessException{
- List<Cluster> clusters = new ArrayList<Cluster>();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(output.toUri(), conf);
FileStatus[] status = fs.listStatus(output, new ClustersFilter());
for (FileStatus s : status) {
- clusters = readClusters(s.getPath());
+ List<Cluster> clusters = readClusters(s.getPath());
CLUSTERS.add(clusters);
}
}
@@ -301,8 +298,12 @@ public class DisplayClustering extends F
}
}
- protected static boolean isSignificant(Cluster cluster) {
- return (double) cluster.getNumPoints() / SAMPLE_DATA.size() > SIGNIFICANCE;
+ protected boolean isSignificant(Cluster cluster) {
+ return (double) cluster.getNumPoints() / SAMPLE_DATA.size() > getSignificance();
+ }
+
+ protected double getSignificance() {
+ return 0.05;
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Fri Aug 13 18:19:16 2010
@@ -27,8 +27,6 @@ import org.apache.mahout.clustering.diri
import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
-import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.VectorWritable;
@@ -51,7 +49,7 @@ public class DisplayDirichlet extends Di
plotClusters((Graphics2D) g);
}
- protected static void printModels(List<Model<VectorWritable>[]> results, int significant) {
+ protected static void printModels(Iterable<Model<VectorWritable>[]> results, int significant) {
int row = 0;
StringBuilder models = new StringBuilder();
for (Model<VectorWritable>[] r : results) {
@@ -73,17 +71,16 @@ public class DisplayDirichlet extends Di
int numIterations,
double alpha_0, int thin, int burnin) {
DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA,
- modelDist,
- alpha_0,
- numClusters,
- thin,
- burnin);
+ modelDist,
+ alpha_0,
+ numClusters,
+ thin,
+ burnin);
List<Model<VectorWritable>[]> result = dc.cluster(numIterations);
printModels(result, burnin);
for (Model<VectorWritable>[] models : result) {
List<Cluster> clusters = new ArrayList<Cluster>();
- for (Model<VectorWritable> model : models) {
- Cluster cluster = (Cluster) model;
+ for (Model<VectorWritable> cluster : models) {
if (isSignificant(cluster)) {
clusters.add(cluster);
}
@@ -94,18 +91,17 @@ public class DisplayDirichlet extends Di
public static void main(String[] args) throws Exception {
VectorWritable modelPrototype = new VectorWritable(new DenseVector(2));
- ModelDistribution<VectorWritable> modelDist;
- modelDist = new NormalModelDistribution(modelPrototype);
- modelDist = new SampledNormalDistribution(modelPrototype);
- modelDist = new AsymmetricSampledNormalDistribution(modelPrototype);
+ // ModelDistribution<VectorWritable> modelDist = new NormalModelDistribution(modelPrototype);
+ // ModelDistribution<VectorWritable> modelDist = new SampledNormalDistribution(modelPrototype);
+ ModelDistribution<VectorWritable> modelDist = new AsymmetricSampledNormalDistribution(modelPrototype);
+
+ RandomUtils.useTestSeed();
+ generateSamples();
int numIterations = 40;
int numClusters = 40;
int alpha_0 = 1;
int thin = 3;
int burnin = 5;
-
- RandomUtils.useTestSeed();
- generateSamples();
generateResults(modelDist, numClusters, numIterations, alpha_0, thin, burnin);
new DisplayDirichlet();
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Fri Aug 13 18:19:16 2010
@@ -19,27 +19,20 @@ package org.apache.mahout.clustering.dis
import java.awt.Graphics;
import java.awt.Graphics2D;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import org.apache.hadoop.fs.Path;
-import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansClusterer;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
-import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
class DisplayFuzzyKMeans extends DisplayClustering {
DisplayFuzzyKMeans() {
initialize();
- this.setTitle("Fuzzy k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
+ this.setTitle("Fuzzy k-Means Clusters (>" + (int) (getSignificance() * 100) + "% of population)");
}
// Override the paint() method
@@ -49,13 +42,8 @@ class DisplayFuzzyKMeans extends Display
plotClusters((Graphics2D) g);
}
- public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
- IllegalAccessException {
+ public static void main(String[] args) throws Exception {
DistanceMeasure measure = new ManhattanDistanceMeasure();
- double threshold = 0.001;
- int numClusters = 3;
- int numIterations = 10;
- int m = 3;
Path samples = new Path("samples");
Path output = new Path("output");
@@ -63,8 +51,11 @@ class DisplayFuzzyKMeans extends Display
HadoopUtil.overwriteOutput(output);
RandomUtils.useTestSeed();
DisplayClustering.generateSamples();
- boolean b = true;
- if (b) {
+ //boolean b = true;
+ double threshold = 0.001;
+ int numIterations = 10;
+ int m = 3;
+ //if (b) {
writeSampleData(samples);
Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3);
FuzzyKMeansDriver.runJob(samples,
@@ -78,39 +69,40 @@ class DisplayFuzzyKMeans extends Display
true,
true,
threshold,
- b);
+ true);
loadClusters(output);
- } else {
- List<Vector> points = new ArrayList<Vector>();
- for (VectorWritable sample : SAMPLE_DATA) {
- points.add((Vector) sample.get());
- }
- int id = 0;
- List<SoftCluster> initialClusters = new ArrayList<SoftCluster>();
- for (Vector point : points) {
- if (initialClusters.size() < Math.min(numClusters, points.size())) {
- initialClusters.add(new SoftCluster(point, id++));
- } else {
- break;
- }
- }
- List<List<SoftCluster>> results = FuzzyKMeansClusterer.clusterPoints(points,
- initialClusters,
- measure,
- threshold,
- m,
- numIterations);
- for (List<SoftCluster> models : results) {
- List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
- for (SoftCluster cluster : models) {
- org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
- if (isSignificant(cluster2)) {
- clusters.add(cluster2);
- }
- }
- CLUSTERS.add(clusters);
- }
- }
+ //} else {
+ // List<Vector> points = new ArrayList<Vector>();
+ // for (VectorWritable sample : SAMPLE_DATA) {
+ // points.add((Vector) sample.get());
+ // }
+ // int id = 0;
+ // List<SoftCluster> initialClusters = new ArrayList<SoftCluster>();
+ // int numClusters = 3;
+ // for (Vector point : points) {
+ // if (initialClusters.size() < Math.min(numClusters, points.size())) {
+ // initialClusters.add(new SoftCluster(point, id++));
+ // } else {
+ // break;
+ // }
+ // }
+ // List<List<SoftCluster>> results = FuzzyKMeansClusterer.clusterPoints(points,
+ // initialClusters,
+ // measure,
+ // threshold,
+ // m,
+ // numIterations);
+ // for (List<SoftCluster> models : results) {
+ // List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
+ // for (SoftCluster cluster : models) {
+ // org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
+ // if (isSignificant(cluster2)) {
+ // clusters.add(cluster2);
+ // }
+ // }
+ // CLUSTERS.add(clusters);
+ // }
+ //}
new DisplayFuzzyKMeans();
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Fri Aug 13 18:19:16 2010
@@ -19,38 +19,26 @@ package org.apache.mahout.clustering.dis
import java.awt.Graphics;
import java.awt.Graphics2D;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import org.apache.hadoop.fs.Path;
-import org.apache.mahout.clustering.AbstractCluster;
-import org.apache.mahout.clustering.kmeans.Cluster;
-import org.apache.mahout.clustering.kmeans.KMeansClusterer;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
class DisplayKMeans extends DisplayClustering {
- static List<List<Cluster>> result;
+ //static List<List<Cluster>> result;
DisplayKMeans() {
initialize();
- this.setTitle("k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
+ this.setTitle("k-Means Clusters (>" + (int) (getSignificance() * 100) + "% of population)");
}
- public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
- IllegalAccessException {
+ public static void main(String[] args) throws Exception {
DistanceMeasure measure = new ManhattanDistanceMeasure();
- int numClusters = 3;
- int maxIter = 10;
- double distanceThreshold = 0.001;
Path samples = new Path("samples");
Path output = new Path("output");
HadoopUtil.overwriteOutput(samples);
@@ -59,38 +47,41 @@ class DisplayKMeans extends DisplayClust
RandomUtils.useTestSeed();
DisplayClustering.generateSamples();
writeSampleData(samples);
- boolean b = true;
- if (b) {
+ //boolean b = true;
+ int maxIter = 10;
+ double distanceThreshold = 0.001;
+ //if (b) {
Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3);
KMeansDriver.runJob(samples, clusters, output, measure.getClass().getName(), distanceThreshold, maxIter, 1, true, true);
loadClusters(output);
- } else {
- List<Vector> points = new ArrayList<Vector>();
- for (VectorWritable sample : SAMPLE_DATA) {
- points.add(sample.get());
- }
- List<Cluster> initialClusters = new ArrayList<Cluster>();
- int id = 0;
- for (Vector point : points) {
- if (initialClusters.size() < Math.min(numClusters, points.size())) {
- initialClusters.add(new Cluster(point, id++));
- } else {
- break;
- }
- }
-
- result = KMeansClusterer.clusterPoints(points, initialClusters, measure, maxIter, distanceThreshold);
- for (List<Cluster> models : result) {
- List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
- for (AbstractCluster cluster : models) {
- org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
- if (isSignificant(cluster2)) {
- clusters.add(cluster2);
- }
- }
- CLUSTERS.add(clusters);
- }
- }
+ //} else {
+ // List<Vector> points = new ArrayList<Vector>();
+ // for (VectorWritable sample : SAMPLE_DATA) {
+ // points.add(sample.get());
+ // }
+ // List<Cluster> initialClusters = new ArrayList<Cluster>();
+ // int id = 0;
+ // int numClusters = 3;
+ // for (Vector point : points) {
+ // if (initialClusters.size() < Math.min(numClusters, points.size())) {
+ // initialClusters.add(new Cluster(point, id++));
+ // } else {
+ // break;
+ // }
+ // }
+ //
+ // result = KMeansClusterer.clusterPoints(points, initialClusters, measure, maxIter, distanceThreshold);
+ // for (List<Cluster> models : result) {
+ // List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
+ // for (AbstractCluster cluster : models) {
+ // org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
+ // if (isSignificant(cluster2)) {
+ // clusters.add(cluster2);
+ // }
+ // }
+ // CLUSTERS.add(clusters);
+ // }
+ //}
new DisplayKMeans();
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java Fri Aug 13 18:19:16 2010
@@ -21,7 +21,6 @@ import java.awt.Color;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -32,6 +31,7 @@ import org.apache.mahout.clustering.mean
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
@@ -49,7 +49,7 @@ final class DisplayMeanShift extends Dis
private DisplayMeanShift() {
initialize();
- this.setTitle("k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
+ this.setTitle("k-Means Clusters (>" + (int) (getSignificance() * 100) + "% of population)");
}
@Override
@@ -75,7 +75,7 @@ final class DisplayMeanShift extends Dis
int i = 0;
for (Cluster cluster : CLUSTERS.get(CLUSTERS.size()-1)) {
MeanShiftCanopy canopy = (MeanShiftCanopy) cluster;
- if (canopy.getBoundPoints().toList().size() >= SIGNIFICANCE * DisplayClustering.SAMPLE_DATA.size()) {
+ if (canopy.getBoundPoints().toList().size() >= getSignificance() * DisplayClustering.SAMPLE_DATA.size()) {
g2.setColor(COLORS[Math.min(i++, DisplayClustering.COLORS.length - 1)]);
int count = 0;
Vector center = new DenseVector(2);
@@ -92,12 +92,15 @@ final class DisplayMeanShift extends Dis
}
}
- public static void main(String[] args) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
- ClassNotFoundException {
+ @Override
+ protected double getSignificance() {
+ return 0.02;
+ }
+
+ public static void main(String[] args) throws Exception {
t1 = 1.5;
t2 = 0.5;
- SIGNIFICANCE = 0.02;
- EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
+ DistanceMeasure measure = new EuclideanDistanceMeasure();
Path samples = new Path("samples");
Path output = new Path("output");
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java Fri Aug 13 18:19:16 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
@@ -37,15 +37,12 @@ public class InputMapper extends Mapper<
private Constructor<?> constructor;
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] numbers = InputMapper.SPACE.split(values.toString());
// sometimes there are multiple separator spaces
- List<Double> doubles = new ArrayList<Double>();
+ Collection<Double> doubles = new ArrayList<Double>();
for (String value : numbers) {
if (value.length() > 0) {
doubles.add(Double.valueOf(value));
@@ -69,17 +66,13 @@ public class InputMapper extends Mapper<
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
- */
- @SuppressWarnings("unchecked")
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
String vectorImplClassName = conf.get("vector.implementation.class.name");
try {
- Class<? extends Vector> outputClass = (Class<? extends Vector>) conf.getClassByName(vectorImplClassName);
+ Class<? extends Vector> outputClass = conf.getClassByName(vectorImplClassName).asSubclass(Vector.class);
constructor = outputClass.getConstructor(int.class);
} catch (NoSuchMethodException e) {
throw new IllegalStateException(e);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Fri Aug 13 18:19:16 2010
@@ -32,7 +32,6 @@ import org.slf4j.LoggerFactory;
public final class Job extends CanopyDriver {
private Job() {
- super();
}
private static final Logger log = LoggerFactory.getLogger(Job.class);
@@ -45,7 +44,7 @@ public final class Job extends CanopyDri
log.info("Running with default arguments");
Path output = new Path("output");
HadoopUtil.overwriteOutput(output);
- new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55);
+ job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55);
}
}
@@ -67,10 +66,9 @@ public final class Job extends CanopyDri
* the canopy T1 threshold
* @param t2
* the canopy T2 threshold
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
- private void job(Path input, Path output, String measureClassName, double t1, double t2) throws IOException,
+ private static void job(Path input, Path output, String measureClassName, double t1, double t2)
+ throws IOException,
InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Fri Aug 13 18:19:16 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.clustering.syn
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.Map;
@@ -44,12 +45,11 @@ import org.slf4j.LoggerFactory;
public final class Job extends DirichletDriver {
+ private static final Logger log = LoggerFactory.getLogger(Job.class);
+
private Job() {
- super();
}
- private static final Logger log = LoggerFactory.getLogger(Job.class);
-
public static void main(String[] args) throws Exception {
if (args.length > 0) {
log.info("Running with only user-supplied arguments");
@@ -71,10 +71,9 @@ public final class Job extends Dirichlet
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
- */
- public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+ @Override
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
NoSuchMethodException, InvocationTargetException, InterruptedException {
addInputOption();
addOutputOption();
@@ -136,10 +135,6 @@ public final class Job extends Dirichlet
* the alpha0 value for the DirichletDistribution
* @param numReducers
* the desired number of reducers
- * @param emitMostLikely
- * @param threshold
- * @throws InterruptedException
- * @throws SecurityException
*/
private void job(Path input,
Path output,
@@ -150,8 +145,9 @@ public final class Job extends Dirichlet
double alpha0,
int numReducers,
boolean emitMostLikely,
- double threshold) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
- NoSuchMethodException, InvocationTargetException, SecurityException, InterruptedException {
+ double threshold)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+ NoSuchMethodException, InvocationTargetException, SecurityException, InterruptedException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput, modelPrototype);
DirichletDriver.runJob(directoryContainingConvertedInput,
@@ -166,8 +162,8 @@ public final class Job extends Dirichlet
emitMostLikely,
threshold, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations),
+ new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
@@ -188,9 +184,6 @@ public final class Job extends Dirichlet
* the int number of models
* @param alpha0
* the double alpha_0 value
- * @throws InvocationTargetException
- * @throws NoSuchMethodException
- * @throws SecurityException
*/
public static void printResults(String output,
String modelDistribution,
@@ -199,15 +192,15 @@ public final class Job extends Dirichlet
int numIterations,
int numModels,
double alpha0) throws NoSuchMethodException, InvocationTargetException {
- List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
+ Collection<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
Configuration conf = new Configuration();
- conf.set(DirichletDriver.MODEL_FACTORY_KEY, modelDistribution);
- conf.set(DirichletDriver.NUM_CLUSTERS_KEY, Integer.toString(numModels));
- conf.set(DirichletDriver.ALPHA_0_KEY, Double.toString(alpha0));
+ conf.set(MODEL_FACTORY_KEY, modelDistribution);
+ conf.set(NUM_CLUSTERS_KEY, Integer.toString(numModels));
+ conf.set(ALPHA_0_KEY, Double.toString(alpha0));
for (int i = 0; i < numIterations; i++) {
- conf.set(DirichletDriver.STATE_IN_KEY, output + "/clusters-" + i);
- conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, vectorClassName);
- conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
+ conf.set(STATE_IN_KEY, output + "/clusters-" + i);
+ conf.set(MODEL_PROTOTYPE_KEY, vectorClassName);
+ conf.set(PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
}
printClusters(clusters, 0);
@@ -222,7 +215,7 @@ public final class Job extends Dirichlet
* @param significant
* the minimum number of samples to enable printing a model
*/
- private static void printClusters(List<List<DirichletCluster<VectorWritable>>> clusters, int significant) {
+ private static void printClusters(Iterable<List<DirichletCluster<VectorWritable>>> clusters, int significant) {
int row = 0;
StringBuilder result = new StringBuilder();
for (List<DirichletCluster<VectorWritable>> r : clusters) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Fri Aug 13 18:19:16 2010
@@ -41,7 +41,6 @@ public final class Job extends FuzzyKMea
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Job() {
- super();
}
public static void main(String[] args) throws Exception {
@@ -147,12 +146,6 @@ public final class Job extends FuzzyKMea
* the double convergence criteria for iterations
* @param runClustering
* the int maximum number of iterations
- * @param output2
- * the String class name of the DistanceMeasure to use
- * @throws IllegalAccessException
- * @throws InstantiationException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
private void job(Path input,
Path output,
@@ -163,8 +156,8 @@ public final class Job extends FuzzyKMea
int numReducerTasks,
float fuzziness,
double convergenceDelta,
- boolean runClustering) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
- ClassNotFoundException {
+ boolean runClustering)
+ throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
@@ -185,7 +178,8 @@ public final class Job extends FuzzyKMea
0.0,
false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"), new Path(output, "clusteredPoints"));
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"),
+ new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Fri Aug 13 18:19:16 2010
@@ -39,7 +39,6 @@ public final class Job extends KMeansDri
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Job() {
- super();
}
public static void main(String[] args) throws Exception {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java Fri Aug 13 18:19:16 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.clustering.syn
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
@@ -33,16 +33,12 @@ public class InputMapper extends Mapper<
private static final Pattern SPACE = Pattern.compile(" ");
private int nextCanopyId;
-
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
- */
+
@Override
protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] numbers = InputMapper.SPACE.split(values.toString());
// sometimes there are multiple separator spaces
- List<Double> doubles = new ArrayList<Double>();
+ Collection<Double> doubles = new ArrayList<Double>();
for (String value : numbers) {
if (value.length() > 0) {
doubles.add(Double.valueOf(value));
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Fri Aug 13 18:19:16 2010
@@ -36,7 +36,6 @@ public final class Job extends MeanShift
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Job() {
- super();
}
public static void main(String[] args) throws Exception {
@@ -51,9 +50,6 @@ public final class Job extends MeanShift
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
- */
@Override
public int run(String[] args) throws Exception {
addInputOption();
@@ -113,12 +109,6 @@ public final class Job extends MeanShift
* the double convergence criteria for iterations
* @param maxIterations
* the int maximum number of iterations
- * @throws ClassNotFoundException
- * @throws InterruptedException
- * @throws IllegalAccessException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws InstantiationException
*/
private void job(Path input,
Path output,
@@ -126,7 +116,8 @@ public final class Job extends MeanShift
double t1,
double t2,
double convergenceDelta,
- int maxIterations) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ int maxIterations)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput);
MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput,
@@ -139,8 +130,8 @@ public final class Job extends MeanShift
true,
true, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations),
+ new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java Fri Aug 13 18:19:16 2010
@@ -182,18 +182,13 @@ public class BuildForest extends Configu
log.info("Build Time: {}", DFUtils.elapsedTime(time));
if (isOob) {
- Random rng;
- if (seed != null) {
- rng = RandomUtils.getRandom(seed);
- } else {
- rng = RandomUtils.getRandom();
- }
-
+ Random rng = seed == null ? RandomUtils.getRandom() : RandomUtils.getRandom(seed);
+
FileSystem fs = dataPath.getFileSystem(getConf());
int[] labels = Data.extractLabels(dataset, fs, dataPath);
- log.info("oob error estimate : "
- + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
+ log.info("oob error estimate : {}",
+ ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
}
return forest;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Fri Aug 13 18:19:16 2010
@@ -134,7 +134,7 @@ public class TestForest extends Configur
outputPath = new Path(outputName);
}
} catch (OptionException e) {
- System.out.println("Exception : " + e);
+ log.warn(e.toString(), e);
CommandLineUtil.printHelp(group);
return -1;
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java Fri Aug 13 18:19:16 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.fpm.pfpgrowth.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
@@ -52,7 +53,7 @@ public class KeyBasedStringTupleMapper e
context.getCounter("Map", "ERROR").increment(1);
return;
}
- List<String> oKey = new ArrayList<String>();
+ Collection<String> oKey = new ArrayList<String>();
for (int groupingField : groupingFields) {
oKey.add(fields[groupingField]);
context.setStatus(fields[groupingField]);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java Fri Aug 13 18:19:16 2010
@@ -18,8 +18,8 @@
package org.apache.mahout.fpm.pfpgrowth.dataset;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashSet;
-import java.util.Set;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
@@ -33,7 +33,7 @@ public class KeyBasedStringTupleReducer
@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException,
InterruptedException {
- Set<String> items = new HashSet<String>();
+ Collection<String> items = new HashSet<String>();
for (StringTuple value : values) {
for (String field : value.getEntries()) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Fri Aug 13 18:19:16 2010
@@ -152,8 +152,16 @@ public final class CDGA {
}
}
- private static void runJob(String dataset, int target, double threshold, int crosspnts, double mutrate, double mutrange,
- int mutprec, int popSize, int genCount) throws IOException, InterruptedException, ClassNotFoundException {
+ private static void runJob(String dataset,
+ int target,
+ double threshold,
+ int crosspnts,
+ double mutrate,
+ double mutrange,
+ int mutprec,
+ int popSize,
+ int genCount)
+ throws IOException, InterruptedException, ClassNotFoundException {
Path inpath = new Path(dataset);
CDMahoutEvaluator.initializeDataSet(inpath);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java Fri Aug 13 18:19:16 2010
@@ -161,32 +161,22 @@ public class CDRule implements Rule {
if (getW(condInd) < threshold) {
return true; // no
}
-
- if (DataSet.getDataSet().isNumerical(attrInd)) {
- return numericalCondition(condInd, dl);
- } else {
- return categoricalCondition(condInd, dl);
- }
+
+ return DataSet.getDataSet().isNumerical(attrInd)
+ ? numericalCondition(condInd, dl)
+ : categoricalCondition(condInd, dl);
}
boolean numericalCondition(int condInd, DataLine dl) {
int attrInd = attributeIndex(condInd);
-
- if (getO(condInd)) {
- return dl.getAttribut(attrInd) >= getV(condInd);
- } else {
- return dl.getAttribut(attrInd) < getV(condInd);
- }
+
+ return getO(condInd) ? dl.getAttribute(attrInd) >= getV(condInd) : dl.getAttribute(attrInd) < getV(condInd);
}
boolean categoricalCondition(int condInd, DataLine dl) {
int attrInd = attributeIndex(condInd);
-
- if (getO(condInd)) {
- return dl.getAttribut(attrInd) == getV(condInd);
- } else {
- return dl.getAttribut(attrInd) != getV(condInd);
- }
+
+ return getO(condInd) ? dl.getAttribute(attrInd) == getV(condInd) : dl.getAttribute(attrInd) != getV(condInd);
}
@Override
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java Fri Aug 13 18:19:16 2010
@@ -43,7 +43,7 @@ public class DataLine {
return (int) attributes[labelPos];
}
- public double getAttribut(int index) {
+ public double getAttribute(int index) {
return attributes[index];
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java Fri Aug 13 18:19:16 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.ga.watchmaker.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Scanner;
@@ -138,7 +139,7 @@ public final class FileInfoParser {
* @param tokenizer
*/
private static NominalAttr parseNominal(StringTokenizer tokenizer) {
- List<String> vlist = new ArrayList<String>();
+ Collection<String> vlist = new ArrayList<String>();
while (tokenizer.hasMoreTokens()) {
vlist.add(nextToken(tokenizer));
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java Fri Aug 13 18:19:16 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.ga.watchmaker.
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -28,6 +29,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Sorter;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -182,7 +184,10 @@ public final class CDMahoutEvaluator {
* sorted in the same order as the candidates.
* @throws IOException
*/
- private static void importEvaluations(FileSystem fs, Configuration conf, Path outpath, List<CDFitness> evaluations) throws IOException {
+ private static void importEvaluations(FileSystem fs,
+ Configuration conf, Path outpath,
+ Collection<CDFitness> evaluations)
+ throws IOException {
Sorter sorter = new Sorter(fs, LongWritable.class, CDFitness.class, conf);
// merge and sort the outputs
@@ -191,7 +196,7 @@ public final class CDMahoutEvaluator {
sorter.merge(outfiles, output);
// import the evaluations
- LongWritable key = new LongWritable();
+ Writable key = new LongWritable();
CDFitness value = new CDFitness();
Reader reader = new Reader(fs, output, conf);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java Fri Aug 13 18:19:16 2010
@@ -45,9 +45,6 @@ public class CDMapper extends Mapper<Lon
int target;
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
DataLine dl = new DataLine(value.toString());
@@ -58,9 +55,6 @@ public class CDMapper extends Mapper<Lon
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java Fri Aug 13 18:19:16 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.ga.watchmaker.cd.hadoop;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
@@ -29,18 +28,14 @@ import org.apache.mahout.ga.watchmaker.c
*/
public class CDReducer extends Reducer<LongWritable, CDFitness, LongWritable, CDFitness> {
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
- protected void reduce(LongWritable key, Iterable<CDFitness> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(LongWritable key, Iterable<CDFitness> values, Context context)
+ throws IOException, InterruptedException {
int tp = 0;
int fp = 0;
int tn = 0;
int fn = 0;
- Iterator<CDFitness> it = values.iterator();
- while (it.hasNext()) {
- CDFitness v = it.next();
+ for (CDFitness v : values) {
tp += v.getTp();
fp += v.getFp();
tn += v.getTn();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Fri Aug 13 18:19:16 2010
@@ -23,11 +23,9 @@ import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.RandomWrapper;
@@ -162,8 +160,6 @@ public class DatasetSplit {
try {
return reader.getProgress();
} catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
return 0;
}
}
@@ -210,18 +206,13 @@ public class DatasetSplit {
}
/**
- * {@link org.apache.hadoop.mapred.TextInputFormat TextInputFormat that uses a {@link RndLineRecordReader
- * RndLineRecordReader} as a RecordReader
+ * {@link TextInputFormat) that uses a {@link RndLineRecordReader} as a RecordReader
*/
public static class DatasetTextInputFormat extends TextInputFormat {
-
- public RecordReader<LongWritable, Text> getRecordReader(InputSplit split, TaskAttemptContext context, Reporter reporter)
- throws IOException {
- reporter.setStatus(split.toString());
-
- LineRecordReader lineRecordReader = new LineRecordReader();
- lineRecordReader.initialize(split, context);
- return new RndLineRecordReader((RecordReader<LongWritable, Text>) lineRecordReader, context.getConfiguration());
+ @Override
+ public RecordReader<LongWritable, Text> createRecordReader(InputSplit split,
+ TaskAttemptContext context) {
+ return new RndLineRecordReader(super.createRecordReader(split, context), context.getConfiguration());
}
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Fri Aug 13 18:19:16 2010
@@ -21,6 +21,7 @@ import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
@@ -41,6 +42,7 @@ import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Sorter;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
@@ -130,7 +132,9 @@ public final class CDInfosTool {
* @param descriptions List of attribute's descriptions
* @throws IOException
*/
- private static void importDescriptions(FileSystem fs, Configuration conf, Path outpath, List<String> descriptions)
+ private static void importDescriptions(FileSystem fs,
+ Configuration conf, Path outpath,
+ Collection<String> descriptions)
throws IOException {
Sorter sorter = new Sorter(fs, LongWritable.class, Text.class, conf);
@@ -140,8 +144,8 @@ public final class CDInfosTool {
sorter.merge(outfiles, output);
// import the descriptions
- LongWritable key = new LongWritable();
- Text value = new Text();
+ Writable key = new LongWritable();
+ Writable value = new Text();
Reader reader = new Reader(fs, output, conf);
while (reader.next(key, value)) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java Fri Aug 13 18:19:16 2010
@@ -42,17 +42,12 @@ public class ToolCombiner extends Reduce
private Descriptors descriptors;
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
- protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(LongWritable key, Iterable<Text> values, Context context)
+ throws IOException, InterruptedException {
context.write(key, new Text(createDescription((int) key.get(), values.iterator())));
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java Fri Aug 13 18:19:16 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.ga.watchmaker.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.StringTokenizer;
@@ -51,9 +52,6 @@ public class ToolMapper extends Mapper<L
private Descriptors descriptors;
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
extractAttributes(value, attributes);
@@ -71,9 +69,6 @@ public class ToolMapper extends Mapper<L
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
- */
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
@@ -95,11 +90,8 @@ public class ToolMapper extends Mapper<L
/**
* Extract attribute values from the input Text. The attributes are separated by a colon ','. Skips ignored
* attributes.
- *
- * @param value
- * @param attributes
*/
- static void extractAttributes(Text value, List<String> attributes) {
+ static void extractAttributes(Text value, Collection<String> attributes) {
StringTokenizer tokenizer = new StringTokenizer(value.toString(), ",");
attributes.clear();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java?rev=985313&r1=985312&r2=985313&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java Fri Aug 13 18:19:16 2010
@@ -42,17 +42,11 @@ public class ToolReducer extends Reducer
private final Set<String> distinct = new HashSet<String>();
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
context.write(key, new Text(combineDescriptions((int) key.get(), values.iterator())));
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context)
- */
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);