You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/25 17:59:36 UTC

svn commit: r688768 - in /lucene/mahout/trunk/examples/src: main/java/org/apache/mahout/cf/taste/example/grouplens/ main/java/org/apache/mahout/classifier/bayes/ main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/ main/java/org/apache/mahou...

Author: srowen
Date: Mon Aug 25 08:59:35 2008
New Revision: 688768

URL: http://svn.apache.org/viewvc?rev=688768&view=rev
Log:
Fix inadvertent changes to WikipediaDatasetCreatorDriver, and apply previous set of adjustments (move to SL4FJ logging, remove "Exception", etc.) to examples code too

Modified:
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java Mon Aug 25 08:59:35 2008
@@ -20,6 +20,8 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
 import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
@@ -29,6 +31,8 @@
  */
 public final class GroupLensRecommenderEvaluatorRunner {
 
+  private static final Logger log = LoggerFactory.getLogger(GroupLensRecommenderEvaluatorRunner.class);
+
   private GroupLensRecommenderEvaluatorRunner() {
     // do nothing
   }
@@ -39,7 +43,7 @@
                                                  new GroupLensDataModel(),
                                                  0.9,
                                                  0.1);
-    System.out.println(evaluation);
+    log.info(String.valueOf(evaluation));
   }
 
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java Mon Aug 25 08:59:35 2008
@@ -21,10 +21,12 @@
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.mahout.classifier.BayesFileFormatter;
 
 import java.io.File;
+import java.io.IOException;
 import java.nio.charset.Charset;
 
 
@@ -38,7 +40,8 @@
 public class PrepareTwentyNewsgroups {
 
   @SuppressWarnings("static-access")
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException, ParseException,
+      ClassNotFoundException, InstantiationException, IllegalAccessException {
     Options options = new Options();
     Option parentOpt = OptionBuilder.withLongOpt("parent").isRequired().hasArg().withDescription("Parent dir containing the newsgroups").create("p");
     options.addOption(parentOpt);

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Mon Aug 25 08:59:35 2008
@@ -22,6 +22,7 @@
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
@@ -34,6 +35,8 @@
 import org.apache.mahout.classifier.cbayes.CBayesModel;
 import org.apache.mahout.common.Classifier;
 import org.apache.mahout.common.Model;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -46,9 +49,15 @@
 
 public class TestClassifier {
 
+  private static final Logger log = LoggerFactory.getLogger(TestClassifier.class);
+
+  private TestClassifier() {
+    // do nothing
+  }
+
   @SuppressWarnings({ "static-access", "unchecked" })
   public static void main(String[] args) throws IOException,
-      ClassNotFoundException, IllegalAccessException, InstantiationException {
+      ClassNotFoundException, IllegalAccessException, InstantiationException, ParseException {
     Options options = new Options();
     Option pathOpt = OptionBuilder.withLongOpt("path").isRequired().hasArg()
         .withDescription("The local file system path").create("p");
@@ -72,113 +81,105 @@
         .hasArg().withDescription("Type of classifier").create("type");
     options.addOption(typeOpt);
 
-    CommandLine cmdLine;
-    try {
-      PosixParser parser = new PosixParser();
-      cmdLine = parser.parse(options, args);
-      SequenceFileModelReader reader = new SequenceFileModelReader();
-      JobConf conf = new JobConf(TestClassifier.class);
-
-      Map<String, Path> modelPaths = new HashMap<String, Path>();
-      String modelBasePath = cmdLine.getOptionValue(pathOpt.getOpt());
-      modelPaths.put("sigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_j/part-*"));
-      modelPaths.put("sigma_k", new Path(modelBasePath + "/trainer-weights/Sigma_k/part-*"));
-      modelPaths.put("sigma_kSigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_kSigma_j/part-*"));
-      modelPaths.put("thetaNormalizer", new Path(modelBasePath + "/trainer-thetaNormalizer/part-*"));
-      modelPaths.put("weight", new Path(modelBasePath + "/trainer-tfIdf/trainer-tfIdf/part-*"));
-
-      FileSystem fs = FileSystem.get(conf);
-
-      System.out.println("Loading model from: " + modelPaths);
-
-      Model model = null;
-      Classifier classifier = null;
-      
-      String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
-      
-      if (classifierType.equalsIgnoreCase("bayes")) {
-        System.out.println("Testing Bayes Classifier");
-        model = new BayesModel();
-        classifier = new BayesClassifier();
-      } else if (classifierType.equalsIgnoreCase("cbayes")) {
-        System.out.println("Testing Complementary Bayes Classifier");
-        model = new CBayesModel();
-        classifier = new CBayesClassifier();
-      }
-     
-      model = reader.loadModel(model, fs, modelPaths, conf);
+    PosixParser parser = new PosixParser();
+    CommandLine cmdLine = parser.parse(options, args);
+    SequenceFileModelReader reader = new SequenceFileModelReader();
+    JobConf conf = new JobConf(TestClassifier.class);
+
+    Map<String, Path> modelPaths = new HashMap<String, Path>();
+    String modelBasePath = cmdLine.getOptionValue(pathOpt.getOpt());
+    modelPaths.put("sigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_j/part-*"));
+    modelPaths.put("sigma_k", new Path(modelBasePath + "/trainer-weights/Sigma_k/part-*"));
+    modelPaths.put("sigma_kSigma_j", new Path(modelBasePath + "/trainer-weights/Sigma_kSigma_j/part-*"));
+    modelPaths.put("thetaNormalizer", new Path(modelBasePath + "/trainer-thetaNormalizer/part-*"));
+    modelPaths.put("weight", new Path(modelBasePath + "/trainer-tfIdf/trainer-tfIdf/part-*"));
+
+    FileSystem fs = FileSystem.get(conf);
+
+    log.info("Loading model from: {}", modelPaths);
+
+    Model model = null;
+    Classifier classifier = null;
+
+    String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
+
+    if (classifierType.equalsIgnoreCase("bayes")) {
+      log.info("Testing Bayes Classifier");
+      model = new BayesModel();
+      classifier = new BayesClassifier();
+    } else if (classifierType.equalsIgnoreCase("cbayes")) {
+      log.info("Testing Complementary Bayes Classifier");
+      model = new CBayesModel();
+      classifier = new CBayesClassifier();
+    }
 
-      System.out.println("Done loading model: # labels: "
-          + model.getLabels().size());
+    model = reader.loadModel(model, fs, modelPaths, conf);
 
-      System.out.println("Done generating Model ");
+    log.info("Done loading model: # labels: {}", model.getLabels().size());
 
-     
+    log.info("Done generating Model");
 
-      String defaultCat = "unknown";
-      if (cmdLine.hasOption(defaultCatOpt.getOpt())) {
-        defaultCat = cmdLine.getOptionValue(defaultCatOpt.getOpt());
-      }
+    String defaultCat = "unknown";
+    if (cmdLine.hasOption(defaultCatOpt.getOpt())) {
+      defaultCat = cmdLine.getOptionValue(defaultCatOpt.getOpt());
+    }
 
-      String encoding = "UTF-8";
-      if (cmdLine.hasOption(encodingOpt.getOpt())) {
-        encoding = cmdLine.getOptionValue(encodingOpt.getOpt());
-      }
-      Analyzer analyzer = null;
-      if (cmdLine.hasOption(analyzerOpt.getOpt())) {
-        String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
-        Class clazz = Class.forName(className);
-        analyzer = (Analyzer) clazz.newInstance();
-      }
-      if (analyzer == null) {
-        analyzer = new StandardAnalyzer();
-      }
-      int gramSize = 1;
-      if (cmdLine.hasOption(gramSizeOpt.getOpt())) {
-        gramSize = Integer.parseInt(cmdLine
-            .getOptionValue(gramSizeOpt.getOpt()));
+    String encoding = "UTF-8";
+    if (cmdLine.hasOption(encodingOpt.getOpt())) {
+      encoding = cmdLine.getOptionValue(encodingOpt.getOpt());
+    }
+    Analyzer analyzer = null;
+    if (cmdLine.hasOption(analyzerOpt.getOpt())) {
+      String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
+      Class clazz = Class.forName(className);
+      analyzer = (Analyzer) clazz.newInstance();
+    }
+    if (analyzer == null) {
+      analyzer = new StandardAnalyzer();
+    }
+    // TODO srowen says analyzer is never used?
+    int gramSize = 1;
+    if (cmdLine.hasOption(gramSizeOpt.getOpt())) {
+      gramSize = Integer.parseInt(cmdLine
+          .getOptionValue(gramSizeOpt.getOpt()));
 
-      }
+    }
 
-      String testDirPath = cmdLine.getOptionValue(dirOpt.getOpt());
-      File dir = new File(testDirPath);
-      File[] subdirs = dir.listFiles();
-
-      ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels());
-
-      if (subdirs != null) {
-        for (int loop = 0; loop < subdirs.length; loop++) {
-
-          String correctLabel = subdirs[loop].getName().split(".txt")[0];
-          System.out.print(correctLabel);
-          BufferedReader fileReader = new BufferedReader(new InputStreamReader(
-              new FileInputStream(subdirs[loop].getPath()), encoding));
-          String line;
-          while ((line = fileReader.readLine()) != null) {
-            
-            Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
-            for (String labelName : document.keySet()) {
-              List<String> strings = document.get(labelName);
-              ClassifierResult classifiedLabel = classifier.classify(model,
-                  strings.toArray(new String[strings.size()]),
-                  defaultCat);
-              resultAnalyzer.addInstance(correctLabel, classifiedLabel);
-            }
+    String testDirPath = cmdLine.getOptionValue(dirOpt.getOpt());
+    File dir = new File(testDirPath);
+    File[] subdirs = dir.listFiles();
+
+    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels());
+
+    if (subdirs != null) {
+      for (int loop = 0; loop < subdirs.length; loop++) {
+
+        String correctLabel = subdirs[loop].getName().split(".txt")[0];
+        BufferedReader fileReader = new BufferedReader(new InputStreamReader(
+            new FileInputStream(subdirs[loop].getPath()), encoding));
+        String line;
+        while ((line = fileReader.readLine()) != null) {
+
+          Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
+          for (String labelName : document.keySet()) {
+            List<String> strings = document.get(labelName);
+            ClassifierResult classifiedLabel = classifier.classify(model,
+                strings.toArray(new String[strings.size()]),
+                defaultCat);
+            resultAnalyzer.addInstance(correctLabel, classifiedLabel);
           }
-          System.out.println("\t"
-              + resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel)
-              + "\t"
-              + resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel)
-              + "/"
-              + resultAnalyzer.getConfusionMatrix().getTotal(correctLabel));
-
         }
+        log.info("{}\t{}\t{}/{}", new Object[] {
+            correctLabel,
+            resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
+            resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
+            resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)
+          });
 
       }
-      System.out.println(resultAnalyzer.summarize());
 
-    } catch (Exception exp) {
-      exp.printStackTrace(System.err);
     }
+    log.info(resultAnalyzer.summarize());
+
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Mon Aug 25 08:59:35 2008
@@ -23,6 +23,8 @@
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.cli.ParseException;
 import org.apache.mahout.classifier.cbayes.CBayesDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
@@ -48,6 +50,8 @@
  */
 public class TrainClassifier {
 
+  private static final Logger log = LoggerFactory.getLogger(TrainClassifier.class);
+
   public void trainNaiveBayes(String dir, String outputDir, int gramSize) throws IOException {
     BayesDriver.runJob(dir, outputDir, gramSize);
   }
@@ -78,11 +82,11 @@
     if (train == true){
       String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
       if(classifierType.equalsIgnoreCase("bayes")){
-        System.out.println("Training Bayes Classifier");
+        log.info("Training Bayes Classifier");
         tn.trainNaiveBayes(cmdLine.getOptionValue(inputDirOpt.getOpt()), cmdLine.getOptionValue(outputOpt.getOpt()), Integer.parseInt(cmdLine.getOptionValue(gramSizeOpt.getOpt())));
 
       } else if(classifierType.equalsIgnoreCase("cbayes")) {
-        System.out.println("Training Complementary Bayes Classifier");
+        log.info("Training Complementary Bayes Classifier");
         //setup the HDFS and copy the files there, then run the trainer
         tn.trainCNaiveBayes(cmdLine.getOptionValue(inputDirOpt.getOpt()), cmdLine.getOptionValue(outputOpt.getOpt()), Integer.parseInt(cmdLine.getOptionValue(gramSizeOpt.getOpt())));
       }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreator.java Mon Aug 25 08:59:35 2008
@@ -22,18 +22,15 @@
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
 
 import java.io.IOException;
 
-/**
- * 
- * 
- */
 public class WikipediaDatasetCreator {
 
   @SuppressWarnings("static-access")
   public static void main(String[] args) throws IOException,
-      ClassNotFoundException, IllegalAccessException, InstantiationException {
+      ClassNotFoundException, IllegalAccessException, InstantiationException, ParseException {
     Options options = new Options();
     Option dirInputPathOpt = OptionBuilder.withLongOpt("dirInputPath").isRequired().hasArg()
         .withDescription("The input Directory Path").create("i");
@@ -45,18 +42,14 @@
         .withDescription("Location of the Countries File").create("c");
     options.addOption(countriesFileOpt);
     
-    CommandLine cmdLine;
-    try {
-      PosixParser parser = new PosixParser();
-      cmdLine = parser.parse(options, args);
-
-      String dirInputPath = cmdLine.getOptionValue(dirInputPathOpt.getOpt());
-      String dirOutputPath = cmdLine.getOptionValue(dirOutputPathOpt.getOpt());
-      String countriesFile = cmdLine.getOptionValue(countriesFileOpt.getOpt());
-
-      WikipediaDatasetCreatorDriver.runJob(dirInputPath, dirOutputPath, countriesFile);
-    } catch (Exception exp) {
-      exp.printStackTrace(System.err);
-    }
+    PosixParser parser = new PosixParser();
+    CommandLine cmdLine = parser.parse(options, args);
+
+    String dirInputPath = cmdLine.getOptionValue(dirInputPathOpt.getOpt());
+    String dirOutputPath = cmdLine.getOptionValue(dirOutputPathOpt.getOpt());
+    String countriesFile = cmdLine.getOptionValue(countriesFileOpt.getOpt());
+
+    WikipediaDatasetCreatorDriver.runJob(dirInputPath, dirOutputPath, countriesFile);
+
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 25 08:59:35 2008
@@ -23,27 +23,31 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
 
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
+import java.io.IOException;
 import java.util.Set;
 import java.util.HashSet;
 
 /**
  * Create and run the Bayes Trainer.
- *
  */
 public class WikipediaDatasetCreatorDriver {
+
   /**
    * Takes in two arguments:
    * <ol>
    * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
-   * <li>The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.classifier.bayes.BayesModel} as a {@link org.apache.hadoop.io.SequenceFile}</li>
+   * <li>The output {@link org.apache.hadoop.fs.Path} where to write the
+   * {@link org.apache.mahout.classifier.bayes.BayesModel} as a {@link org.apache.hadoop.io.SequenceFile}</li>
    * </ol>
    * @param args The args
    */
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     String input = args[0];
     String output = args[1];
     String countriesFile = args[2];
@@ -56,10 +60,8 @@
    *
    * @param input            the input pathname String
    * @param output           the output pathname String
-
    */
-  @SuppressWarnings({ "deprecation" })
-  public static void runJob(String input, String output, String countriesFile) {
+  public static void runJob(String input, String output, String countriesFile) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(WikipediaDatasetCreatorDriver.class);
 
@@ -69,9 +71,9 @@
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Text.class);
 
-    conf.setInputPath(new Path(input));
+    FileInputFormat.setInputPaths(conf, new Path(input));
     Path outPath = new Path(output);
-    conf.setOutputPath(outPath);
+    FileOutputFormat.setOutputPath(conf, outPath);
 
     conf.setMapperClass(WikipediaDatasetCreatorMapper.class);
     conf.setNumMapTasks(100);
@@ -79,37 +81,34 @@
     //conf.setCombinerClass(WikipediaDatasetCreatorReducer.class);
     conf.setReducerClass(WikipediaDatasetCreatorReducer.class);
     conf.setOutputFormat(WikipediaDatasetCreatorOutputFormat.class);
-    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
+    conf.set("io.serializations",
+             "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
+
+    FileSystem dfs = FileSystem.get(conf);
+    if (dfs.exists(outPath))
+      dfs.delete(outPath, true);
+
+    Set<String> countries= new HashSet<String>();
+
+
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        new FileInputStream(countriesFile), "UTF-8"));
+    String line;
+    while((line = reader.readLine())!=null){
+      countries.add(line);
+    }
+    reader.close();
 
+    DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,GenericsUtil.getClass(countries));
+
+    String countriesString = setStringifier.toString(countries);
+
+    conf.set("wikipedia.countries", countriesString);
+
+    client.setConf(conf);
+    JobClient.runJob(conf);
 
-    try {
-      FileSystem dfs = FileSystem.get(conf);
-      if (dfs.exists(outPath))
-        dfs.delete(outPath, true);
-
-      Set<String> countries= new HashSet<String>();
-
-
-      BufferedReader reader = new BufferedReader(new InputStreamReader(
-          new FileInputStream(countriesFile), "UTF-8"));
-      String line;
-      while((line = reader.readLine())!=null){
-        countries.add(line);
-      }
-      reader.close();
-      
-      DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,GenericsUtil.getClass(countries));
-
-      String countriesString = setStringifier.toString(countries);  
-
-      conf.set("wikipedia.countries", countriesString);
-      
-      client.setConf(conf);  
-      JobClient.runJob(conf);      
-   
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
     
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Mon Aug 25 08:59:35 2008
@@ -42,18 +42,15 @@
     Mapper<Text, Text, Text, Text> {
 
   static Set<String> countries = null;
-
   
-  @SuppressWarnings("deprecation")
   public void map(Text key, Text value,
       OutputCollector<Text, Text> output, Reporter reporter)
       throws IOException {
     String document = value.toString();
     Analyzer analyzer = new StandardAnalyzer();
     StringBuilder contents = new StringBuilder();
-    
-    
-    HashSet<String> categories = new HashSet<String>(findAllCategories(document));
+
+    Set<String> categories = new HashSet<String>(findAllCategories(document));
     
     String country = getCountry(categories);
     
@@ -63,9 +60,8 @@
       while(true){
         Token token = stream.next();
         if(token==null) break;
-        contents.append(token.termText()).append(" ");
+        contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
       }
-      //System.err.println(country+"\t"+contents.toString());
       output.collect(new Text(country.replace(" ","_")), new Text(contents.toString()));
     }
   }
@@ -104,12 +100,12 @@
   
   @Override
   public void configure(JobConf job) {
-    try
-    {
+    try {
       if(countries ==null){
         countries = new HashSet<String>();
 
-        DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(job,GenericsUtil.getClass(countries));
+        DefaultStringifier<Set<String>> setStringifier =
+            new DefaultStringifier<Set<String>>(job,GenericsUtil.getClass(countries));
 
         String countriesString = setStringifier.toString(countries);  
         countriesString = job.get("wikipedia.countries", countriesString);
@@ -117,10 +113,8 @@
         countries = setStringifier.fromString(countriesString);
         
       }
-    }
-    catch(IOException ex){
-      
-      ex.printStackTrace();
+    } catch(IOException ex){
+      throw new RuntimeException(ex);
     }
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java Mon Aug 25 08:59:35 2008
@@ -25,12 +25,10 @@
 import java.io.IOException;
 import java.util.Iterator;
 
-
 /**
  *  Can also be used as a local Combiner
  *
- **/
-
+ */
 public class WikipediaDatasetCreatorReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
   public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
     //Key is label,word, value is the number of times we've seen this label word per local node.  Output is the same

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java Mon Aug 25 08:59:35 2008
@@ -22,6 +22,7 @@
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
 
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
@@ -30,12 +31,13 @@
 import java.io.FileOutputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
+import java.io.IOException;
 import java.text.DecimalFormat;
 
 public class WikipediaXmlSplitter {
 
   @SuppressWarnings("static-access")
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException, ParseException {
     Options options = new Options();
     Option dumpFileOpt = OptionBuilder.withLongOpt("dumpfile").isRequired().hasArg().withDescription("The path to the wikipedia dump file").create("d");
     options.addOption(dumpFileOpt);
@@ -43,89 +45,83 @@
     options.addOption(outputDirOpt);
     Option chunkSizeOpt = OptionBuilder.withLongOpt("chunkSize").isRequired().hasArg().withDescription("the Size of chunk in Megabytes").create("c");
     options.addOption(chunkSizeOpt);
-    CommandLine cmdLine;
-    try {
-      PosixParser parser = new PosixParser();
-      cmdLine = parser.parse(options, args);
-
-      String dumpFilePath = cmdLine.getOptionValue(dumpFileOpt.getOpt());
-      String outputDirPath = cmdLine.getOptionValue(outputDirOpt.getOpt());
-      int chunkSize = 1024 * 1024 * Integer.parseInt(cmdLine.getOptionValue(chunkSizeOpt.getOpt()));
-      
-      BufferedReader dumpReader = new BufferedReader(new InputStreamReader(
-          new FileInputStream(dumpFilePath), "UTF-8"));
-
-      File dir = new File(outputDirPath);
-      dir.getPath();
-      
-
-      String header = ""
-          + "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd\" version=\"0.3\" xml:lang=\"en\">\n"
-          + "  <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
-          + "    <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
-          + "    <generator>MediaWiki 1.13alpha</generator>\n"
-          + "    <case>first-letter</case>\n" 
-          + "    <namespaces>\n"
-          + "      <namespace key=\"-2\">Media</namespace>\n"
-          + "      <namespace key=\"-1\">Special</namespace>\n"
-          + "      <namespace key=\"0\" />\n"
-          + "      <namespace key=\"1\">Talk</namespace>\n"
-          + "      <namespace key=\"2\">User</namespace>\n"
-          + "      <namespace key=\"3\">User talk</namespace>\n"
-          + "      <namespace key=\"4\">Wikipedia</namespace>\n"
-          + "      <namespace key=\"5\">Wikipedia talk</namespace>\n"
-          + "      <namespace key=\"6\">Image</namespace>\n"
-          + "      <namespace key=\"7\">Image talk</namespace>\n"
-          + "      <namespace key=\"8\">MediaWiki</namespace>\n"
-          + "      <namespace key=\"9\">MediaWiki talk</namespace>\n"
-          + "      <namespace key=\"10\">Template</namespace>\n"
-          + "      <namespace key=\"11\">Template talk</namespace>\n"
-          + "      <namespace key=\"12\">Help</namespace>\n"
-          + "      <namespace key=\"13\">Help talk</namespace>\n"
-          + "      <namespace key=\"14\">Category</namespace>\n"
-          + "      <namespace key=\"15\">Category talk</namespace>\n"
-          + "      <namespace key=\"100\">Portal</namespace>\n"
-          + "      <namespace key=\"101\">Portal talk</namespace>\n"
-          + "    </namespaces>\n" 
-          + "  </siteinfo>\n";
-      String thisLine;
-      StringBuilder content = new StringBuilder();
-      content.append(header);
-      Integer filenumber = new Integer(0);
-      DecimalFormat decimalFormatter = new DecimalFormat("0000");
-      while ((thisLine = dumpReader.readLine()) != null) 
-      {
-        boolean end = false;
-        if(thisLine.trim().startsWith("<page>")){
-          while(thisLine.trim().startsWith("</page>")==false){
-            content.append(thisLine).append("\n"); 
-            if ((thisLine = dumpReader.readLine()) == null){
-              end=true;
-              break;
-            }
-          }
+
+    PosixParser parser = new PosixParser();
+    CommandLine cmdLine = parser.parse(options, args);
+
+    String dumpFilePath = cmdLine.getOptionValue(dumpFileOpt.getOpt());
+    String outputDirPath = cmdLine.getOptionValue(outputDirOpt.getOpt());
+    int chunkSize = 1024 * 1024 * Integer.parseInt(cmdLine.getOptionValue(chunkSizeOpt.getOpt()));
+
+    BufferedReader dumpReader = new BufferedReader(new InputStreamReader(
+        new FileInputStream(dumpFilePath), "UTF-8"));
+
+    File dir = new File(outputDirPath);
+    dir.getPath();
+
+
+    String header =
+          "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd\" version=\"0.3\" xml:lang=\"en\">\n"
+        + "  <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
+        + "    <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
+        + "    <generator>MediaWiki 1.13alpha</generator>\n"
+        + "    <case>first-letter</case>\n"
+        + "    <namespaces>\n"
+        + "      <namespace key=\"-2\">Media</namespace>\n"
+        + "      <namespace key=\"-1\">Special</namespace>\n"
+        + "      <namespace key=\"0\" />\n"
+        + "      <namespace key=\"1\">Talk</namespace>\n"
+        + "      <namespace key=\"2\">User</namespace>\n"
+        + "      <namespace key=\"3\">User talk</namespace>\n"
+        + "      <namespace key=\"4\">Wikipedia</namespace>\n"
+        + "      <namespace key=\"5\">Wikipedia talk</namespace>\n"
+        + "      <namespace key=\"6\">Image</namespace>\n"
+        + "      <namespace key=\"7\">Image talk</namespace>\n"
+        + "      <namespace key=\"8\">MediaWiki</namespace>\n"
+        + "      <namespace key=\"9\">MediaWiki talk</namespace>\n"
+        + "      <namespace key=\"10\">Template</namespace>\n"
+        + "      <namespace key=\"11\">Template talk</namespace>\n"
+        + "      <namespace key=\"12\">Help</namespace>\n"
+        + "      <namespace key=\"13\">Help talk</namespace>\n"
+        + "      <namespace key=\"14\">Category</namespace>\n"
+        + "      <namespace key=\"15\">Category talk</namespace>\n"
+        + "      <namespace key=\"100\">Portal</namespace>\n"
+        + "      <namespace key=\"101\">Portal talk</namespace>\n"
+        + "    </namespaces>\n"
+        + "  </siteinfo>\n";
+    String thisLine;
+    StringBuilder content = new StringBuilder();
+    content.append(header);
+    int filenumber = 0;
+    DecimalFormat decimalFormatter = new DecimalFormat("0000");
+    while ((thisLine = dumpReader.readLine()) != null)
+    {
+      boolean end = false;
+      if(thisLine.trim().startsWith("<page>")){
+        while(thisLine.trim().startsWith("</page>")==false){
           content.append(thisLine).append("\n");
-          
-          if(content.length()>chunkSize || end){
-            content.append("</mediawiki>");
-            filenumber++;
-            
-            BufferedWriter chunkWriter = new BufferedWriter(new OutputStreamWriter(
-                new FileOutputStream(dir.getPath()+"/chunk-"+ decimalFormatter.format(filenumber)+".xml"), "UTF-8"));
-            
-            chunkWriter.write(content.toString(), 0, content.length());
-            chunkWriter.close();
-            
-            content = new StringBuilder();
-            
-            content.append(header);
-            
+          if ((thisLine = dumpReader.readLine()) == null){
+            end=true;
+            break;
           }
         }
-      } 
+        content.append(thisLine).append("\n");
+
+        if(content.length()>chunkSize || end){
+          content.append("</mediawiki>");
+          filenumber++;
 
-    } catch (Exception exp) {
-      exp.printStackTrace(System.err);
+          BufferedWriter chunkWriter = new BufferedWriter(new OutputStreamWriter(
+              new FileOutputStream(dir.getPath()+"/chunk-"+ decimalFormatter.format(filenumber)+".xml"), "UTF-8"));
+
+          chunkWriter.write(content.toString(), 0, content.length());
+          chunkWriter.close();
+
+          content = new StringBuilder();
+          content.append(header);
+        }
+      }
     }
+
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
 
 public class InputDriver {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     runJob(args[0], args[1]);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Mon Aug 25 08:59:35 2008
@@ -27,7 +27,7 @@
 
 public class Job {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     if (args.length == 5) {
       String input = args[0];
       String output = args[1];

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
 
 public class OutputDriver {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     runJob(args[0], args[1]);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
 
 public class Job {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     if (args.length == 6) {
       String input = args[0];
       String output = args[1];

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
 
 public class OutputDriver {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     runJob(args[0], args[1]);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Mon Aug 25 08:59:35 2008
@@ -29,7 +29,7 @@
 
 public class InputDriver {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     runJob(args[0], args[1]);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Mon Aug 25 08:59:35 2008
@@ -27,7 +27,7 @@
 
 public class Job {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     if (args.length == 7) {
       String input = args[0];
       String output = args[1];

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java Mon Aug 25 08:59:35 2008
@@ -31,7 +31,7 @@
 
 public class OutputDriver {
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws IOException {
     runJob(args[0], args[1]);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java Mon Aug 25 08:59:35 2008
@@ -24,12 +24,16 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
 import org.apache.mahout.matrix.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
 public class OutputMapper extends MapReduceBase implements
     Mapper<Text, Text, Text, Text> {
 
+  private static final Logger log = LoggerFactory.getLogger(OutputMapper.class);
+
   int clusters = 0;
 
   public void map(Text key, Text values, OutputCollector<Text, Text> output,
@@ -43,7 +47,7 @@
 
   @Override
   public void close() throws IOException {
-    System.out.println("+++ Clusters=" + clusters);
+    log.info("+++ Clusters={}", clusters);
     super.close();
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Mon Aug 25 08:59:35 2008
@@ -32,6 +32,8 @@
 import org.uncommons.watchmaker.framework.operators.EvolutionPipeline;
 import org.uncommons.watchmaker.framework.selection.RouletteWheelSelection;
 import org.uncommons.watchmaker.framework.termination.GenerationCount;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -63,6 +65,8 @@
  */
 public class CDGA {
 
+  private static final Logger log = LoggerFactory.getLogger(CDGA.class);
+
   public static void main(String[] args) throws IOException {
     String dataset = "build/classes/wdbc";
     int target = 1;
@@ -85,7 +89,7 @@
       popSize = Integer.parseInt(args[7]);
       genCount = Integer.parseInt(args[8]);
     } else {
-      System.out.println("Invalid arguments, working with default parameters instead");
+      log.warn("Invalid arguments, working with default parameters instead");
 	  }
 
     long start = System.currentTimeMillis();
@@ -127,7 +131,7 @@
 
     engine.addEvolutionObserver(new EvolutionObserver<Rule>() {
       public void populationUpdate(PopulationData<Rule> data) {
-        System.out.println("Generation " + data.getGenerationNumber());
+        log.info("Generation {}", data.getGenerationNumber());
       }
     });
 
@@ -144,8 +148,8 @@
         inpath, split);
 
     // evaluate the solution over the testing set
-    System.out.println("Best solution fitness (train set) : " + bestTrainFit);
-    System.out.println("Best solution fitness (test set) : " + bestTestFit);
+    log.info("Best solution fitness (train set) : {}", bestTrainFit);
+    log.info("Best solution fitness (test set) : {}", bestTestFit);
   }
 
   static void printElapsedTime(long milli) {
@@ -158,7 +162,6 @@
     long hours = minutes / 60;
     minutes %= 60;
 
-    System.out.println("Elapsed time (Hours:minutes:seconds:milli) : " + hours
-        + ":" + minutes + ":" + seconds + ":" + milli);
+    log.info("Elapsed time (Hours:minutes:seconds:milli) : {}:{}:{}:{}", new Object[] {hours, minutes, seconds, milli});
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Mon Aug 25 08:59:35 2008
@@ -33,6 +33,8 @@
 import org.apache.mahout.ga.watchmaker.OutputUtils;
 import org.apache.mahout.ga.watchmaker.cd.FileInfoParser;
 import org.apache.mahout.utils.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.BufferedWriter;
 import java.io.IOException;
@@ -44,10 +46,11 @@
 /**
  * Gathers additional information about a given dataset. Takes a descriptor
  * about the attributes, and generates a description for each one.
- * 
  */
 public class CDInfosTool {
 
+  private static final Logger log = LoggerFactory.getLogger(CDInfosTool.class);
+
   /**
    * Uses Mahout to gather the information about a dataset.
    * 
@@ -199,21 +202,21 @@
   public static void main(String[] args) throws IOException {
     // command-line parameters
     if (args.length == 0) {
-      System.out.println("Usage: CDInfosTool dataset_path");
+      log.warn("Usage: CDInfosTool dataset_path");
       throw new IllegalArgumentException();
     }
 
     FileSystem fs = FileSystem.get(new Configuration());
     Path inpath = new Path(args[0]);
 
-    System.out.println("Loading Descriptors...");
+    log.info("Loading Descriptors...");
     Descriptors descriptors = loadDescriptors(fs, inpath);
 
-    System.out.println("Gathering informations...");
+    log.info("Gathering informations...");
     List<String> descriptions = new ArrayList<String>();
     gatherInfos(descriptors, inpath, descriptions);
 
-    System.out.println("Storing Descriptions...");
+    log.info("Storing Descriptions...");
     storeDescriptions(fs, inpath, descriptors, descriptions);
   }
 }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java Mon Aug 25 08:59:35 2008
@@ -30,7 +30,7 @@
   private MockDataSet mock;
 
   @Override
-  protected void setUp() throws Exception {
+  protected void setUp() {
     rng = new MersenneTwisterRNG();
     mock = new MockDataSet(rng, 100);
   }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java Mon Aug 25 08:59:35 2008
@@ -49,7 +49,7 @@
   }
 
   @Override
-  protected void setUp() throws Exception {
+  protected void setUp() {
     rng = new MersenneTwisterRNG();
     mock = new MockDataSet(rng, 50);
   }

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java Mon Aug 25 08:59:35 2008
@@ -20,7 +20,7 @@
   private CDFitness expected;
 
   @Override
-  protected void setUp() throws Exception {
+  protected void setUp() {
     // generate random evaluatons and calculate expectations
     evaluations = new ArrayList<CDFitness>();
     Random rng = new Random();

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java?rev=688768&r1=688767&r2=688768&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java Mon Aug 25 08:59:35 2008
@@ -39,7 +39,7 @@
   Random rng;
 
   @Override
-  protected void setUp() throws Exception {
+  protected void setUp() {
     rng = new MersenneTwisterRNG();
   }