You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/25 17:59:36 UTC
svn commit: r688768 - in /lucene/mahout/trunk/examples/src:
main/java/org/apache/mahout/cf/taste/example/grouplens/
main/java/org/apache/mahout/classifier/bayes/
main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/
main/java/org/apache/mahou...
Author: srowen
Date: Mon Aug 25 08:59:35 2008
New Revision: 688768
URL: http://svn.apache.org/viewvc?rev=688768&view=rev
Log:
Fix inadvertent changes to WikipediaDatasetCreatorDriver, and apply previous set of adjustments (move to SL4FJ logging, remove "Exception", etc.) to examples code too
Modified:
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java Mon Aug 25 08:59:35 2008
@@ -20,6 +20,8 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
@@ -29,6 +31,8 @@
*/
public final class GroupLensRecommenderEvaluatorRunner {
+ private static final Logger log = LoggerFactory.getLogger(GroupLensRecommenderEvaluatorRunner.class);
+
private GroupLensRecommenderEvaluatorRunner() {
// do nothing
}
@@ -39,7 +43,7 @@
new GroupLensDataModel(),
0.9,
0.1);
- System.out.println(evaluation);
+ log.info(String.valueOf(evaluation));
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java Mon Aug 25 08:59:35 2008
@@ -21,10 +21,12 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.classifier.BayesFileFormatter;
import java.io.File;
+import java.io.IOException;
import java.nio.charset.Charset;
@@ -38,7 +40,8 @@
public class PrepareTwentyNewsgroups {
@SuppressWarnings("static-access")
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException, ParseException,
+ ClassNotFoundException, InstantiationException, IllegalAccessException {
Options options = new Options();
Option parentOpt = OptionBuilder.withLongOpt("parent").isRequired().hasArg().withDescription("Parent dir containing the newsgroups").create("p");
options.addOption(parentOpt);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Mon Aug 25 08:59:35 2008
@@ -22,6 +22,7 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
@@ -34,6 +35,8 @@
import org.apache.mahout.classifier.cbayes.CBayesModel;
import org.apache.mahout.common.Classifier;
import org.apache.mahout.common.Model;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.File;
@@ -46,9 +49,15 @@
public class TestClassifier {
+ private static final Logger log = LoggerFactory.getLogger(TestClassifier.class);
+
+ private TestClassifier() {
+ // do nothing
+ }
+
@SuppressWarnings({ "static-access", "unchecked" })
public static void main(String[] args) throws IOException,
- ClassNotFoundException, IllegalAccessException, InstantiationException {
+ ClassNotFoundException, IllegalAccessException, InstantiationException, ParseException {
Options options = new Options();
Option pathOpt = OptionBuilder.withLongOpt("path").isRequired().hasArg()
.withDescription("The local file system path").create("p");
@@ -72,113 +81,105 @@
.hasArg().withDescription("Type of classifier").create("type");
options.addOption(typeOpt);
- CommandLine cmdLine;
- try {
- PosixParser parser = new PosixParser();
- cmdLine = parser.parse(options, args);
- SequenceFileModelReader reader = new SequenceFileModelReader();
- JobConf conf = new JobConf(TestClassifier.class);
-
- Map<String, Path> modelPaths = new HashMap<String, Path>();
- String modelBasePath = cmdLine.getOptionValue(pathOpt.getOpt());
- modelPaths.put("sigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_j/part-*"));
- modelPaths.put("sigma_k", new Path(modelBasePath + "/trainer-weights/Sigma_k/part-*"));
- modelPaths.put("sigma_kSigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_kSigma_j/part-*"));
- modelPaths.put("thetaNormalizer", new Path(modelBasePath + "/trainer-thetaNormalizer/part-*"));
- modelPaths.put("weight", new Path(modelBasePath + "/trainer-tfIdf/trainer-tfIdf/part-*"));
-
- FileSystem fs = FileSystem.get(conf);
-
- System.out.println("Loading model from: " + modelPaths);
-
- Model model = null;
- Classifier classifier = null;
-
- String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
-
- if (classifierType.equalsIgnoreCase("bayes")) {
- System.out.println("Testing Bayes Classifier");
- model = new BayesModel();
- classifier = new BayesClassifier();
- } else if (classifierType.equalsIgnoreCase("cbayes")) {
- System.out.println("Testing Complementary Bayes Classifier");
- model = new CBayesModel();
- classifier = new CBayesClassifier();
- }
-
- model = reader.loadModel(model, fs, modelPaths, conf);
+ PosixParser parser = new PosixParser();
+ CommandLine cmdLine = parser.parse(options, args);
+ SequenceFileModelReader reader = new SequenceFileModelReader();
+ JobConf conf = new JobConf(TestClassifier.class);
+
+ Map<String, Path> modelPaths = new HashMap<String, Path>();
+ String modelBasePath = cmdLine.getOptionValue(pathOpt.getOpt());
+ modelPaths.put("sigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_j/part-*"));
+ modelPaths.put("sigma_k", new Path(modelBasePath + "/trainer-weights/Sigma_k/part-*"));
+ modelPaths.put("sigma_kSigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_kSigma_j/part-*"));
+ modelPaths.put("thetaNormalizer", new Path(modelBasePath + "/trainer-thetaNormalizer/part-*"));
+ modelPaths.put("weight", new Path(modelBasePath + "/trainer-tfIdf/trainer-tfIdf/part-*"));
+
+ FileSystem fs = FileSystem.get(conf);
+
+ log.info("Loading model from: {}", modelPaths);
+
+ Model model = null;
+ Classifier classifier = null;
+
+ String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
+
+ if (classifierType.equalsIgnoreCase("bayes")) {
+ log.info("Testing Bayes Classifier");
+ model = new BayesModel();
+ classifier = new BayesClassifier();
+ } else if (classifierType.equalsIgnoreCase("cbayes")) {
+ log.info("Testing Complementary Bayes Classifier");
+ model = new CBayesModel();
+ classifier = new CBayesClassifier();
+ }
- System.out.println("Done loading model: # labels: "
- + model.getLabels().size());
+ model = reader.loadModel(model, fs, modelPaths, conf);
- System.out.println("Done generating Model ");
+ log.info("Done loading model: # labels: {}", model.getLabels().size());
-
+ log.info("Done generating Model");
- String defaultCat = "unknown";
- if (cmdLine.hasOption(defaultCatOpt.getOpt())) {
- defaultCat = cmdLine.getOptionValue(defaultCatOpt.getOpt());
- }
+ String defaultCat = "unknown";
+ if (cmdLine.hasOption(defaultCatOpt.getOpt())) {
+ defaultCat = cmdLine.getOptionValue(defaultCatOpt.getOpt());
+ }
- String encoding = "UTF-8";
- if (cmdLine.hasOption(encodingOpt.getOpt())) {
- encoding = cmdLine.getOptionValue(encodingOpt.getOpt());
- }
- Analyzer analyzer = null;
- if (cmdLine.hasOption(analyzerOpt.getOpt())) {
- String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
- Class clazz = Class.forName(className);
- analyzer = (Analyzer) clazz.newInstance();
- }
- if (analyzer == null) {
- analyzer = new StandardAnalyzer();
- }
- int gramSize = 1;
- if (cmdLine.hasOption(gramSizeOpt.getOpt())) {
- gramSize = Integer.parseInt(cmdLine
- .getOptionValue(gramSizeOpt.getOpt()));
+ String encoding = "UTF-8";
+ if (cmdLine.hasOption(encodingOpt.getOpt())) {
+ encoding = cmdLine.getOptionValue(encodingOpt.getOpt());
+ }
+ Analyzer analyzer = null;
+ if (cmdLine.hasOption(analyzerOpt.getOpt())) {
+ String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
+ Class clazz = Class.forName(className);
+ analyzer = (Analyzer) clazz.newInstance();
+ }
+ if (analyzer == null) {
+ analyzer = new StandardAnalyzer();
+ }
+ // TODO srowen says analyzer is never used?
+ int gramSize = 1;
+ if (cmdLine.hasOption(gramSizeOpt.getOpt())) {
+ gramSize = Integer.parseInt(cmdLine
+ .getOptionValue(gramSizeOpt.getOpt()));
- }
+ }
- String testDirPath = cmdLine.getOptionValue(dirOpt.getOpt());
- File dir = new File(testDirPath);
- File[] subdirs = dir.listFiles();
-
- ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels());
-
- if (subdirs != null) {
- for (int loop = 0; loop < subdirs.length; loop++) {
-
- String correctLabel = subdirs[loop].getName().split(".txt")[0];
- System.out.print(correctLabel);
- BufferedReader fileReader = new BufferedReader(new InputStreamReader(
- new FileInputStream(subdirs[loop].getPath()), encoding));
- String line;
- while ((line = fileReader.readLine()) != null) {
-
- Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
- for (String labelName : document.keySet()) {
- List<String> strings = document.get(labelName);
- ClassifierResult classifiedLabel = classifier.classify(model,
- strings.toArray(new String[strings.size()]),
- defaultCat);
- resultAnalyzer.addInstance(correctLabel, classifiedLabel);
- }
+ String testDirPath = cmdLine.getOptionValue(dirOpt.getOpt());
+ File dir = new File(testDirPath);
+ File[] subdirs = dir.listFiles();
+
+ ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels());
+
+ if (subdirs != null) {
+ for (int loop = 0; loop < subdirs.length; loop++) {
+
+ String correctLabel = subdirs[loop].getName().split(".txt")[0];
+ BufferedReader fileReader = new BufferedReader(new InputStreamReader(
+ new FileInputStream(subdirs[loop].getPath()), encoding));
+ String line;
+ while ((line = fileReader.readLine()) != null) {
+
+ Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
+ for (String labelName : document.keySet()) {
+ List<String> strings = document.get(labelName);
+ ClassifierResult classifiedLabel = classifier.classify(model,
+ strings.toArray(new String[strings.size()]),
+ defaultCat);
+ resultAnalyzer.addInstance(correctLabel, classifiedLabel);
}
- System.out.println("\t"
- + resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel)
- + "\t"
- + resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel)
- + "/"
- + resultAnalyzer.getConfusionMatrix().getTotal(correctLabel));
-
}
+ log.info("{}\t{}\t{}/{}", new Object[] {
+ correctLabel,
+ resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
+ resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
+ resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)
+ });
}
- System.out.println(resultAnalyzer.summarize());
- } catch (Exception exp) {
- exp.printStackTrace(System.err);
}
+ log.info(resultAnalyzer.summarize());
+
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Mon Aug 25 08:59:35 2008
@@ -23,6 +23,8 @@
import org.apache.commons.cli.PosixParser;
import org.apache.commons.cli.ParseException;
import org.apache.mahout.classifier.cbayes.CBayesDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
@@ -48,6 +50,8 @@
*/
public class TrainClassifier {
+ private static final Logger log = LoggerFactory.getLogger(TrainClassifier.class);
+
public void trainNaiveBayes(String dir, String outputDir, int gramSize) throws IOException {
BayesDriver.runJob(dir, outputDir, gramSize);
}
@@ -78,11 +82,11 @@
if (train == true){
String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
if(classifierType.equalsIgnoreCase("bayes")){
- System.out.println("Training Bayes Classifier");
+ log.info("Training Bayes Classifier");
tn.trainNaiveBayes(cmdLine.getOptionValue(inputDirOpt.getOpt()), cmdLine.getOptionValue(outputOpt.getOpt()), Integer.parseInt(cmdLine.getOptionValue(gramSizeOpt.getOpt())));
} else if(classifierType.equalsIgnoreCase("cbayes")) {
- System.out.println("Training Complementary Bayes Classifier");
+ log.info("Training Complementary Bayes Classifier");
//setup the HDFS and copy the files there, then run the trainer
tn.trainCNaiveBayes(cmdLine.getOptionValue(inputDirOpt.getOpt()), cmdLine.getOptionValue(outputOpt.getOpt()), Integer.parseInt(cmdLine.getOptionValue(gramSizeOpt.getOpt())));
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java Mon Aug 25 08:59:35 2008
@@ -22,18 +22,15 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
import java.io.IOException;
-/**
- *
- *
- */
public class WikipediaDatasetCreator {
@SuppressWarnings("static-access")
public static void main(String[] args) throws IOException,
- ClassNotFoundException, IllegalAccessException, InstantiationException {
+ ClassNotFoundException, IllegalAccessException, InstantiationException, ParseException {
Options options = new Options();
Option dirInputPathOpt = OptionBuilder.withLongOpt("dirInputPath").isRequired().hasArg()
.withDescription("The input Directory Path").create("i");
@@ -45,18 +42,14 @@
.withDescription("Location of the Countries File").create("c");
options.addOption(countriesFileOpt);
- CommandLine cmdLine;
- try {
- PosixParser parser = new PosixParser();
- cmdLine = parser.parse(options, args);
-
- String dirInputPath = cmdLine.getOptionValue(dirInputPathOpt.getOpt());
- String dirOutputPath = cmdLine.getOptionValue(dirOutputPathOpt.getOpt());
- String countriesFile = cmdLine.getOptionValue(countriesFileOpt.getOpt());
-
- WikipediaDatasetCreatorDriver.runJob(dirInputPath, dirOutputPath, countriesFile);
- } catch (Exception exp) {
- exp.printStackTrace(System.err);
- }
+ PosixParser parser = new PosixParser();
+ CommandLine cmdLine = parser.parse(options, args);
+
+ String dirInputPath = cmdLine.getOptionValue(dirInputPathOpt.getOpt());
+ String dirOutputPath = cmdLine.getOptionValue(dirOutputPathOpt.getOpt());
+ String countriesFile = cmdLine.getOptionValue(countriesFileOpt.getOpt());
+
+ WikipediaDatasetCreatorDriver.runJob(dirInputPath, dirOutputPath, countriesFile);
+
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 25 08:59:35 2008
@@ -23,27 +23,31 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
+import java.io.IOException;
import java.util.Set;
import java.util.HashSet;
/**
* Create and run the Bayes Trainer.
- *
*/
public class WikipediaDatasetCreatorDriver {
+
/**
* Takes in two arguments:
* <ol>
* <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
- * <li>The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.classifier.bayes.BayesModel} as a {@link org.apache.hadoop.io.SequenceFile}</li>
+ * <li>The output {@link org.apache.hadoop.fs.Path} where to write the
+ * {@link org.apache.mahout.classifier.bayes.BayesModel} as a {@link org.apache.hadoop.io.SequenceFile}</li>
* </ol>
* @param args The args
*/
- public static void main(String[] args) {
+ public static void main(String[] args) throws IOException {
String input = args[0];
String output = args[1];
String countriesFile = args[2];
@@ -56,10 +60,8 @@
*
* @param input the input pathname String
* @param output the output pathname String
-
*/
- @SuppressWarnings({ "deprecation" })
- public static void runJob(String input, String output, String countriesFile) {
+ public static void runJob(String input, String output, String countriesFile) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(WikipediaDatasetCreatorDriver.class);
@@ -69,9 +71,9 @@
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
- conf.setInputPath(new Path(input));
+ FileInputFormat.setInputPaths(conf, new Path(input));
Path outPath = new Path(output);
- conf.setOutputPath(outPath);
+ FileOutputFormat.setOutputPath(conf, outPath);
conf.setMapperClass(WikipediaDatasetCreatorMapper.class);
conf.setNumMapTasks(100);
@@ -79,37 +81,34 @@
//conf.setCombinerClass(WikipediaDatasetCreatorReducer.class);
conf.setReducerClass(WikipediaDatasetCreatorReducer.class);
conf.setOutputFormat(WikipediaDatasetCreatorOutputFormat.class);
- conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
+ conf.set("io.serializations",
+ "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+ // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
+
+ FileSystem dfs = FileSystem.get(conf);
+ if (dfs.exists(outPath))
+ dfs.delete(outPath, true);
+
+ Set<String> countries= new HashSet<String>();
+
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ new FileInputStream(countriesFile), "UTF-8"));
+ String line;
+ while((line = reader.readLine())!=null){
+ countries.add(line);
+ }
+ reader.close();
+ DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,GenericsUtil.getClass(countries));
+
+ String countriesString = setStringifier.toString(countries);
+
+ conf.set("wikipedia.countries", countriesString);
+
+ client.setConf(conf);
+ JobClient.runJob(conf);
- try {
- FileSystem dfs = FileSystem.get(conf);
- if (dfs.exists(outPath))
- dfs.delete(outPath, true);
-
- Set<String> countries= new HashSet<String>();
-
-
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- new FileInputStream(countriesFile), "UTF-8"));
- String line;
- while((line = reader.readLine())!=null){
- countries.add(line);
- }
- reader.close();
-
- DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,GenericsUtil.getClass(countries));
-
- String countriesString = setStringifier.toString(countries);
-
- conf.set("wikipedia.countries", countriesString);
-
- client.setConf(conf);
- JobClient.runJob(conf);
-
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Mon Aug 25 08:59:35 2008
@@ -42,18 +42,15 @@
Mapper<Text, Text, Text, Text> {
static Set<String> countries = null;
-
- @SuppressWarnings("deprecation")
public void map(Text key, Text value,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String document = value.toString();
Analyzer analyzer = new StandardAnalyzer();
StringBuilder contents = new StringBuilder();
-
-
- HashSet<String> categories = new HashSet<String>(findAllCategories(document));
+
+ Set<String> categories = new HashSet<String>(findAllCategories(document));
String country = getCountry(categories);
@@ -63,9 +60,8 @@
while(true){
Token token = stream.next();
if(token==null) break;
- contents.append(token.termText()).append(" ");
+ contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
}
- //System.err.println(country+"\t"+contents.toString());
output.collect(new Text(country.replace(" ","_")), new Text(contents.toString()));
}
}
@@ -104,12 +100,12 @@
@Override
public void configure(JobConf job) {
- try
- {
+ try {
if(countries ==null){
countries = new HashSet<String>();
- DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(job,GenericsUtil.getClass(countries));
+ DefaultStringifier<Set<String>> setStringifier =
+ new DefaultStringifier<Set<String>>(job,GenericsUtil.getClass(countries));
String countriesString = setStringifier.toString(countries);
countriesString = job.get("wikipedia.countries", countriesString);
@@ -117,10 +113,8 @@
countries = setStringifier.fromString(countriesString);
}
- }
- catch(IOException ex){
-
- ex.printStackTrace();
+ } catch(IOException ex){
+ throw new RuntimeException(ex);
}
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java Mon Aug 25 08:59:35 2008
@@ -25,12 +25,10 @@
import java.io.IOException;
import java.util.Iterator;
-
/**
* Can also be used as a local Combiner
*
- **/
-
+ */
public class WikipediaDatasetCreatorReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
//Key is label,word, value is the number of times we've seen this label word per local node. Output is the same
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java Mon Aug 25 08:59:35 2008
@@ -22,6 +22,7 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
import java.io.BufferedReader;
import java.io.BufferedWriter;
@@ -30,12 +31,13 @@
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.io.IOException;
import java.text.DecimalFormat;
public class WikipediaXmlSplitter {
@SuppressWarnings("static-access")
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException, ParseException {
Options options = new Options();
Option dumpFileOpt = OptionBuilder.withLongOpt("dumpfile").isRequired().hasArg().withDescription("The path to the wikipedia dump file").create("d");
options.addOption(dumpFileOpt);
@@ -43,89 +45,83 @@
options.addOption(outputDirOpt);
Option chunkSizeOpt = OptionBuilder.withLongOpt("chunkSize").isRequired().hasArg().withDescription("the Size of chunk in Megabytes").create("c");
options.addOption(chunkSizeOpt);
- CommandLine cmdLine;
- try {
- PosixParser parser = new PosixParser();
- cmdLine = parser.parse(options, args);
-
- String dumpFilePath = cmdLine.getOptionValue(dumpFileOpt.getOpt());
- String outputDirPath = cmdLine.getOptionValue(outputDirOpt.getOpt());
- int chunkSize = 1024 * 1024 * Integer.parseInt(cmdLine.getOptionValue(chunkSizeOpt.getOpt()));
-
- BufferedReader dumpReader = new BufferedReader(new InputStreamReader(
- new FileInputStream(dumpFilePath), "UTF-8"));
-
- File dir = new File(outputDirPath);
- dir.getPath();
-
-
- String header = ""
- + "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd\" version=\"0.3\" xml:lang=\"en\">\n"
- + " <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
- + " <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
- + " <generator>MediaWiki 1.13alpha</generator>\n"
- + " <case>first-letter</case>\n"
- + " <namespaces>\n"
- + " <namespace key=\"-2\">Media</namespace>\n"
- + " <namespace key=\"-1\">Special</namespace>\n"
- + " <namespace key=\"0\" />\n"
- + " <namespace key=\"1\">Talk</namespace>\n"
- + " <namespace key=\"2\">User</namespace>\n"
- + " <namespace key=\"3\">User talk</namespace>\n"
- + " <namespace key=\"4\">Wikipedia</namespace>\n"
- + " <namespace key=\"5\">Wikipedia talk</namespace>\n"
- + " <namespace key=\"6\">Image</namespace>\n"
- + " <namespace key=\"7\">Image talk</namespace>\n"
- + " <namespace key=\"8\">MediaWiki</namespace>\n"
- + " <namespace key=\"9\">MediaWiki talk</namespace>\n"
- + " <namespace key=\"10\">Template</namespace>\n"
- + " <namespace key=\"11\">Template talk</namespace>\n"
- + " <namespace key=\"12\">Help</namespace>\n"
- + " <namespace key=\"13\">Help talk</namespace>\n"
- + " <namespace key=\"14\">Category</namespace>\n"
- + " <namespace key=\"15\">Category talk</namespace>\n"
- + " <namespace key=\"100\">Portal</namespace>\n"
- + " <namespace key=\"101\">Portal talk</namespace>\n"
- + " </namespaces>\n"
- + " </siteinfo>\n";
- String thisLine;
- StringBuilder content = new StringBuilder();
- content.append(header);
- Integer filenumber = new Integer(0);
- DecimalFormat decimalFormatter = new DecimalFormat("0000");
- while ((thisLine = dumpReader.readLine()) != null)
- {
- boolean end = false;
- if(thisLine.trim().startsWith("<page>")){
- while(thisLine.trim().startsWith("</page>")==false){
- content.append(thisLine).append("\n");
- if ((thisLine = dumpReader.readLine()) == null){
- end=true;
- break;
- }
- }
+
+ PosixParser parser = new PosixParser();
+ CommandLine cmdLine = parser.parse(options, args);
+
+ String dumpFilePath = cmdLine.getOptionValue(dumpFileOpt.getOpt());
+ String outputDirPath = cmdLine.getOptionValue(outputDirOpt.getOpt());
+ int chunkSize = 1024 * 1024 * Integer.parseInt(cmdLine.getOptionValue(chunkSizeOpt.getOpt()));
+
+ BufferedReader dumpReader = new BufferedReader(new InputStreamReader(
+ new FileInputStream(dumpFilePath), "UTF-8"));
+
+ File dir = new File(outputDirPath);
+ dir.getPath();
+
+
+ String header =
+ "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd\" version=\"0.3\" xml:lang=\"en\">\n"
+ + " <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
+ + " <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
+ + " <generator>MediaWiki 1.13alpha</generator>\n"
+ + " <case>first-letter</case>\n"
+ + " <namespaces>\n"
+ + " <namespace key=\"-2\">Media</namespace>\n"
+ + " <namespace key=\"-1\">Special</namespace>\n"
+ + " <namespace key=\"0\" />\n"
+ + " <namespace key=\"1\">Talk</namespace>\n"
+ + " <namespace key=\"2\">User</namespace>\n"
+ + " <namespace key=\"3\">User talk</namespace>\n"
+ + " <namespace key=\"4\">Wikipedia</namespace>\n"
+ + " <namespace key=\"5\">Wikipedia talk</namespace>\n"
+ + " <namespace key=\"6\">Image</namespace>\n"
+ + " <namespace key=\"7\">Image talk</namespace>\n"
+ + " <namespace key=\"8\">MediaWiki</namespace>\n"
+ + " <namespace key=\"9\">MediaWiki talk</namespace>\n"
+ + " <namespace key=\"10\">Template</namespace>\n"
+ + " <namespace key=\"11\">Template talk</namespace>\n"
+ + " <namespace key=\"12\">Help</namespace>\n"
+ + " <namespace key=\"13\">Help talk</namespace>\n"
+ + " <namespace key=\"14\">Category</namespace>\n"
+ + " <namespace key=\"15\">Category talk</namespace>\n"
+ + " <namespace key=\"100\">Portal</namespace>\n"
+ + " <namespace key=\"101\">Portal talk</namespace>\n"
+ + " </namespaces>\n"
+ + " </siteinfo>\n";
+ String thisLine;
+ StringBuilder content = new StringBuilder();
+ content.append(header);
+ int filenumber = 0;
+ DecimalFormat decimalFormatter = new DecimalFormat("0000");
+ while ((thisLine = dumpReader.readLine()) != null)
+ {
+ boolean end = false;
+ if(thisLine.trim().startsWith("<page>")){
+ while(thisLine.trim().startsWith("</page>")==false){
content.append(thisLine).append("\n");
-
- if(content.length()>chunkSize || end){
- content.append("</mediawiki>");
- filenumber++;
-
- BufferedWriter chunkWriter = new BufferedWriter(new OutputStreamWriter(
- new FileOutputStream(dir.getPath()+"/chunk-"+ decimalFormatter.format(filenumber)+".xml"), "UTF-8"));
-
- chunkWriter.write(content.toString(), 0, content.length());
- chunkWriter.close();
-
- content = new StringBuilder();
-
- content.append(header);
-
+ if ((thisLine = dumpReader.readLine()) == null){
+ end=true;
+ break;
}
}
- }
+ content.append(thisLine).append("\n");
+
+ if(content.length()>chunkSize || end){
+ content.append("</mediawiki>");
+ filenumber++;
- } catch (Exception exp) {
- exp.printStackTrace(System.err);
+ BufferedWriter chunkWriter = new BufferedWriter(new OutputStreamWriter(
+ new FileOutputStream(dir.getPath()+"/chunk-"+ decimalFormatter.format(filenumber)+".xml"), "UTF-8"));
+
+ chunkWriter.write(content.toString(), 0, content.length());
+ chunkWriter.close();
+
+ content = new StringBuilder();
+ content.append(header);
+ }
+ }
}
+
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
public class InputDriver {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
runJob(args[0], args[1]);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Mon Aug 25 08:59:35 2008
@@ -27,7 +27,7 @@
public class Job {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
if (args.length == 5) {
String input = args[0];
String output = args[1];
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
public class OutputDriver {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
runJob(args[0], args[1]);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
public class Job {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
if (args.length == 6) {
String input = args[0];
String output = args[1];
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
public class OutputDriver {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
runJob(args[0], args[1]);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
public class InputDriver {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
runJob(args[0], args[1]);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Mon Aug 25 08:59:35 2008
@@ -27,7 +27,7 @@
public class Job {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
if (args.length == 7) {
String input = args[0];
String output = args[1];
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -31,7 +31,7 @@
public class OutputDriver {
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws IOException {
runJob(args[0], args[1]);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java Mon Aug 25 08:59:35 2008
@@ -24,12 +24,16 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
import org.apache.mahout.matrix.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
public class OutputMapper extends MapReduceBase implements
Mapper<Text, Text, Text, Text> {
+ private static final Logger log = LoggerFactory.getLogger(OutputMapper.class);
+
int clusters = 0;
public void map(Text key, Text values, OutputCollector<Text, Text> output,
@@ -43,7 +47,7 @@
@Override
public void close() throws IOException {
- System.out.println("+++ Clusters=" + clusters);
+ log.info("+++ Clusters={}", clusters);
super.close();
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Mon Aug 25 08:59:35 2008
@@ -32,6 +32,8 @@
import org.uncommons.watchmaker.framework.operators.EvolutionPipeline;
import org.uncommons.watchmaker.framework.selection.RouletteWheelSelection;
import org.uncommons.watchmaker.framework.termination.GenerationCount;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
@@ -63,6 +65,8 @@
*/
public class CDGA {
+ private static final Logger log = LoggerFactory.getLogger(CDGA.class);
+
public static void main(String[] args) throws IOException {
String dataset = "build/classes/wdbc";
int target = 1;
@@ -85,7 +89,7 @@
popSize = Integer.parseInt(args[7]);
genCount = Integer.parseInt(args[8]);
} else {
- System.out.println("Invalid arguments, working with default parameters instead");
+ log.warn("Invalid arguments, working with default parameters instead");
}
long start = System.currentTimeMillis();
@@ -127,7 +131,7 @@
engine.addEvolutionObserver(new EvolutionObserver<Rule>() {
public void populationUpdate(PopulationData<Rule> data) {
- System.out.println("Generation " + data.getGenerationNumber());
+ log.info("Generation {}", data.getGenerationNumber());
}
});
@@ -144,8 +148,8 @@
inpath, split);
// evaluate the solution over the testing set
- System.out.println("Best solution fitness (train set) : " + bestTrainFit);
- System.out.println("Best solution fitness (test set) : " + bestTestFit);
+ log.info("Best solution fitness (train set) : {}", bestTrainFit);
+ log.info("Best solution fitness (test set) : {}", bestTestFit);
}
static void printElapsedTime(long milli) {
@@ -158,7 +162,6 @@
long hours = minutes / 60;
minutes %= 60;
- System.out.println("Elapsed time (Hours:minutes:seconds:milli) : " + hours
- + ":" + minutes + ":" + seconds + ":" + milli);
+ log.info("Elapsed time (Hours:minutes:seconds:milli) : {}:{}:{}:{}", new Object[] {hours, minutes, seconds, milli});
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Mon Aug 25 08:59:35 2008
@@ -33,6 +33,8 @@
import org.apache.mahout.ga.watchmaker.OutputUtils;
import org.apache.mahout.ga.watchmaker.cd.FileInfoParser;
import org.apache.mahout.utils.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.IOException;
@@ -44,10 +46,11 @@
/**
* Gathers additional information about a given dataset. Takes a descriptor
* about the attributes, and generates a description for each one.
- *
*/
public class CDInfosTool {
+ private static final Logger log = LoggerFactory.getLogger(CDInfosTool.class);
+
/**
* Uses Mahout to gather the information about a dataset.
*
@@ -199,21 +202,21 @@
public static void main(String[] args) throws IOException {
// command-line parameters
if (args.length == 0) {
- System.out.println("Usage: CDInfosTool dataset_path");
+ log.warn("Usage: CDInfosTool dataset_path");
throw new IllegalArgumentException();
}
FileSystem fs = FileSystem.get(new Configuration());
Path inpath = new Path(args[0]);
- System.out.println("Loading Descriptors...");
+ log.info("Loading Descriptors...");
Descriptors descriptors = loadDescriptors(fs, inpath);
- System.out.println("Gathering informations...");
+ log.info("Gathering informations...");
List<String> descriptions = new ArrayList<String>();
gatherInfos(descriptors, inpath, descriptions);
- System.out.println("Storing Descriptions...");
+ log.info("Storing Descriptions...");
storeDescriptions(fs, inpath, descriptors, descriptions);
}
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
private MockDataSet mock;
@Override
- protected void setUp() throws Exception {
+ protected void setUp() {
rng = new MersenneTwisterRNG();
mock = new MockDataSet(rng, 100);
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java Mon Aug 25 08:59:35 2008
@@ -49,7 +49,7 @@
}
@Override
- protected void setUp() throws Exception {
+ protected void setUp() {
rng = new MersenneTwisterRNG();
mock = new MockDataSet(rng, 50);
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java Mon Aug 25 08:59:35 2008
@@ -20,7 +20,7 @@
private CDFitness expected;
@Override
- protected void setUp() throws Exception {
+ protected void setUp() {
// generate random evaluatons and calculate expectations
evaluations = new ArrayList<CDFitness>();
Random rng = new Random();
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java Mon Aug 25 08:59:35 2008
@@ -39,7 +39,7 @@
Random rng;
@Override
- protected void setUp() throws Exception {
+ protected void setUp() {
rng = new MersenneTwisterRNG();
}