You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2009/10/03 20:23:06 UTC

svn commit: r821385 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/ core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/ core/src/main/java/org/apache/mahout/classifier/bayes/mapreduc...

Author: isabel
Date: Sat Oct  3 18:23:05 2009
New Revision: 821385

URL: http://svn.apache.org/viewvc?rev=821385&view=rev
Log:
MAHOUT-138 - adjusted jobs for bayes and cbayes to use cli.

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java Sat Oct  3 18:23:05 2009
@@ -22,35 +22,31 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesFeatureDriver;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfDriver;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesWeightSummerDriver;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
 /** Create and run the Bayes Trainer. */
-public class BayesDriver {
+public class BayesDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(BayesDriver.class);
 
-  private BayesDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the Model as a
    * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
    * @param args The args
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
+   * @throws Exception in case of job execution problems. 
    */
-  public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new BayesDriver());
   }
 
   /**
@@ -61,7 +57,7 @@
    * @throws ClassNotFoundException 
    * @throws InterruptedException 
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
     JobConf conf = new JobConf(BayesDriver.class);
     Path outPath = new Path(output);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
@@ -71,22 +67,26 @@
 
     log.info("Reading features...");
     //Read the features in each document normalized by length of each document
-    BayesFeatureDriver.runJob(input, output, params);
+    BayesFeatureDriver feature = new BayesFeatureDriver();
+    feature.runJob(input, output, params);
 
     log.info("Calculating Tf-Idf...");
     //Calculate the TfIdf for each word in each label
-    BayesTfIdfDriver.runJob(input, output, params);
+    BayesTfIdfDriver tfidf = new BayesTfIdfDriver();
+    tfidf.runJob(input, output, params);
 
     log.info("Calculating weight sums for labels and features...");
     //Calculate the Sums of weights for each label, for each feature and for each feature and for each label
-    BayesWeightSummerDriver.runJob(input, output, params);
+    BayesWeightSummerDriver summer = new BayesWeightSummerDriver();
+    summer.runJob(input, output, params);
 
     //Calculate the W_ij = log(Theta) for each label, feature. This step actually generates the complement class
     //CBayesThetaDriver.runJob(input, output);
 
     log.info("Calculating the weight Normalisation factor for each class...");
     //Calculate the normalization factor Sigma_W_ij for each complement class.
-    BayesThetaNormalizerDriver.runJob(input, output, params);
+    BayesThetaNormalizerDriver normalizer = new BayesThetaNormalizerDriver();
+    normalizer.runJob(input, output, params);
 
     //Calculate the normalization factor Sigma_W_ij for each complement class.
     //CBayesNormalizedWeightDriver.runJob(input, output);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java Sat Oct  3 18:23:05 2009
@@ -31,6 +31,10 @@
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
 import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,25 +42,20 @@
 import java.util.Map;
 
 /** Create and run the Bayes Theta Normalization Step. */
-public class BayesThetaNormalizerDriver {
+public class BayesThetaNormalizerDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(BayesThetaNormalizerDriver.class);
 
-  private BayesThetaNormalizerDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the the interim filesas a {@link
    * org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args - should contain input and output path.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new BayesThetaNormalizerDriver());
   }
 
   /**
@@ -65,7 +64,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(BayesThetaNormalizerDriver.class);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java Sat Oct  3 18:23:05 2009
@@ -22,35 +22,31 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesFeatureDriver;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfDriver;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesWeightSummerDriver;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
 /** Create and run the Bayes Trainer. */
-public class CBayesDriver {
+public class CBayesDriver implements BayesJob{
 
   private static final Logger log = LoggerFactory.getLogger(CBayesDriver.class);
 
-  private CBayesDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link Path} where the input documents live</li>
    * <li>The output {@link Path} where to write the Model as a
    * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
+   * @param args The args input and output path.
+   * @throws Exception in case of problems during job execution. 
    */
-  public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new CBayesDriver());
   }
 
   /**
@@ -61,7 +57,7 @@
    * @throws ClassNotFoundException 
    * @throws InterruptedException 
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
     JobConf conf = new JobConf(CBayesDriver.class);
     Path outPath = new Path(output);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
@@ -71,22 +67,26 @@
 
     log.info("Reading features...");
     //Read the features in each document normalized by length of each document
-    BayesFeatureDriver.runJob(input, output, params);
+    BayesFeatureDriver feature = new BayesFeatureDriver();
+    feature.runJob(input, output, params);
 
     log.info("Calculating Tf-Idf...");
     //Calculate the TfIdf for each word in each label
-    BayesTfIdfDriver.runJob(input, output,params);
+    BayesTfIdfDriver tfidf = new BayesTfIdfDriver();
+    tfidf.runJob(input, output,params);
 
     log.info("Calculating weight sums for labels and features...");
     //Calculate the Sums of weights for each label, for each feature and for each feature and for each label
-    BayesWeightSummerDriver.runJob(input, output, params);
+    BayesWeightSummerDriver summer = new BayesWeightSummerDriver();
+    summer.runJob(input, output, params);
 
     //Calculate the W_ij = log(Theta) for each label, feature. This step actually generates the complement class
     //CBayesThetaDriver.runJob(input, output);
 
     log.info("Calculating the weight Normalisation factor for each complement class...");
     //Calculate the normalization factor Sigma_W_ij for each complement class.
-    CBayesThetaNormalizerDriver.runJob(input, output, params);
+    CBayesThetaNormalizerDriver normalizer = new CBayesThetaNormalizerDriver();
+    normalizer.runJob(input, output, params);
 
     //Calculate the normalization factor Sigma_W_ij for each complement class.
     //CBayesNormalizedWeightDriver.runJob(input, output);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java Sat Oct  3 18:23:05 2009
@@ -31,6 +31,8 @@
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
 import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,25 +40,20 @@
 import java.util.Map;
 
 /** Create and run the Bayes Trainer. */
-public class CBayesNormalizedWeightDriver {
+public class CBayesNormalizedWeightDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(CBayesNormalizedWeightDriver.class);
 
-  private CBayesNormalizedWeightDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the Model as a
    * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args - contains input and output path.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new CBayesNormalizedWeightDriver());
   }
 
   /**
@@ -65,7 +62,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(CBayesNormalizedWeightDriver.class);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java Sat Oct  3 18:23:05 2009
@@ -26,11 +26,14 @@
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
 import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,7 +41,7 @@
 import java.util.Map;
 
 /** Create and run the Bayes Trainer. */
-public class CBayesThetaDriver {
+public class CBayesThetaDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(CBayesThetaDriver.class);
 
@@ -50,13 +53,11 @@
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the Model as a
    * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args - input path and output path
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new CBayesThetaDriver());
   }
 
   /**
@@ -65,7 +66,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(CBayesThetaDriver.class);
     conf.setJobName("Complementary Theta Driver running over input: " +  input);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java Sat Oct  3 18:23:05 2009
@@ -31,6 +31,8 @@
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
 import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
+import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
+import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,25 +40,20 @@
 import java.util.Map;
 
 /** Create and run the Bayes Trainer. */
-public class CBayesThetaNormalizerDriver {
+public class CBayesThetaNormalizerDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(CBayesThetaNormalizerDriver.class);
 
-  private CBayesThetaNormalizerDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the Model as a
    * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args input and output path.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new CBayesThetaNormalizerDriver());
   }
 
   /**
@@ -65,7 +62,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);
     conf.setJobName("Complementary Bayes Theta Normalizer Driver running over input: " +  input);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java Sat Oct  3 18:23:05 2009
@@ -33,25 +33,21 @@
 import java.io.IOException;
 
 /** Create and run the Bayes Feature Reader Step. */
-public class BayesFeatureDriver {
+public class BayesFeatureDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(BayesFeatureDriver.class);
 
-  private BayesFeatureDriver() {
-  }
-
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the interim files as a {@link
    * org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args - input and output path.
+   * @throws Exception in case of problems during job execution.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new BayesFeatureDriver());
   }
 
   /**
@@ -60,7 +56,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(BayesFeatureDriver.class);
     conf.setJobName("Bayes Feature Driver running over input: " +  input);

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java?rev=821385&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java Sat Oct  3 18:23:05 2009
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.classifier.bayes.mapreduce.common;
+
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
+
+/**
+ * Implementors of this interface provide a way for running bayes training jobs on
+ * a hadoop cluster.
+ * */
+public interface BayesJob {
+
+  /**
+   * Execute a classification job on a cluster.
+   * @param input path to training documents.
+   * @param output path to output directory.
+   * */
+  void runJob(final String input, final String output, final BayesParameters params) throws Exception;
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java Sat Oct  3 18:23:05 2009
@@ -31,6 +31,7 @@
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
@@ -42,13 +43,10 @@
 import java.util.Map;
 
 /** The Driver which drives the Tf-Idf Generation */
-public class BayesTfIdfDriver {
+public class BayesTfIdfDriver implements BayesJob {
 
   private static final Logger log = LoggerFactory.getLogger(BayesTfIdfDriver.class);
 
-  private BayesTfIdfDriver() {
-  }
-
   /**
    * Takes in two arguments:
    * <ol>
@@ -58,14 +56,12 @@
    * files as a {@link org.apache.hadoop.io.SequenceFile}</li>
    * </ol>
    * 
-   * @param args The args
-   * @throws ClassNotFoundException
+   * @param args The args - input and output path.
+   * @throws Exception in case of problems during job execution.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new BayesTfIdfDriver());
   }
 
   /**
@@ -75,7 +71,7 @@
    * @param output the output pathname String
    * @throws ClassNotFoundException
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
 
     JobClient client = new JobClient();
     JobConf conf = new JobConf(BayesWeightSummerDriver.class);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java Sat Oct  3 18:23:05 2009
@@ -31,22 +31,18 @@
 import java.io.IOException;
 
 /** Create and run the Bayes Trainer. */
-public class BayesWeightSummerDriver {
-  private BayesWeightSummerDriver() {
-  }
+public class BayesWeightSummerDriver implements BayesJob {
 
   /**
    * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the the interim files as a {@link
    * org.apache.hadoop.io.SequenceFile}</li> </ol>
    *
-   * @param args The args
+   * @param args The args - should contain input and output path.
    */
-  public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-
-    runJob(input, output, new BayesParameters(1));
+  public static void main(String[] args) throws Exception {
+    JobExecutor executor = new JobExecutor();
+    executor.execute(args, new BayesWeightSummerDriver());
   }
 
   /**
@@ -55,7 +51,7 @@
    * @param input  the input pathname String
    * @param output the output pathname String
    */
-  public static void runJob(String input, String output, BayesParameters params) throws IOException {
+  public void runJob(String input, String output, BayesParameters params) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(BayesWeightSummerDriver.class);
     conf.setJobName("Bayes Weight Summer Driver running over input: " +  input);

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=821385&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Sat Oct  3 18:23:05 2009
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.classifier.bayes.mapreduce.common;
+
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
+import org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesDriver;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class JobExecutor {
+  /** Logger for this class. */
+  private static final Logger log = LoggerFactory.getLogger(BayesDriver.class);
+
+  /**
+   * Execute a bayes classification job. Input and output path are parsed from
+   * the input parameters.
+   * @param args input parameters.
+   * @param job the job to execute. 
+   * @throws Exception any exception thrown at job execution.
+   * */
+  public void execute(final String args[], final BayesJob job) throws Exception {
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(helpOpt).create();
+
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt).toString();
+      String output = cmdLine.getValue(outputOpt).toString();
+
+      job.runJob(input, output, new BayesParameters(1));
+    } catch (OptionException e) {
+      log.error("Exception", e);
+      CommandLineUtil.printHelp(group);
+    }
+  }
+}

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=821385&r1=821384&r2=821385&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Sat Oct  3 18:23:05 2009
@@ -61,11 +61,13 @@
   }
 
   public static void trainNaiveBayes(String dir, String outputDir, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
-    BayesDriver.runJob(dir, outputDir, params);
+    BayesDriver driver = new BayesDriver();
+    driver.runJob(dir, outputDir, params);
   }
   
   public static void trainCNaiveBayes(String dir, String outputDir, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
-    CBayesDriver.runJob(dir, outputDir, params);
+    CBayesDriver driver = new CBayesDriver();
+    driver.runJob(dir, outputDir, params);
   }
 
   public static void main(String[] args) throws IOException, OptionException, NumberFormatException, IllegalStateException, InterruptedException, ClassNotFoundException {