You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/04 05:42:32 UTC

svn commit: r1345821 - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/naivebayes/ core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ core/src/test/java/org/apache/mahout/classifier/naivebayes/ core/src/test/java/org...

Author: robinanil
Date: Mon Jun  4 03:42:31 2012
New Revision: 1345821

URL: http://svn.apache.org/viewvc?rev=1345821&view=rev
Log:
MAHOUT-1006 Fixes test to use new format, disabled theta training phase for now. Some code cleanup

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java
    mahout/trunk/examples/bin/classify-20newsgroups.sh

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Mon Jun  4 03:42:31 2012
@@ -45,7 +45,7 @@ public abstract class AbstractNaiveBayes
       Element e = elements.next();
       result += e.get() * getScoreForLabelFeature(label, e.index());
     }
-    return -result;
+    return result;
   }
   
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Mon Jun  4 03:42:31 2012
@@ -78,16 +78,16 @@ public final class BayesUtils {
       scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
     }
 
-    Vector perlabelThetaNormalizer = null;
-    for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
+    Vector perlabelThetaNormalizer = scoresPerLabel.like();
+    /* for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
         new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) {
       if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) {
         perlabelThetaNormalizer = entry.getSecond().get();
       }
-    }
+    } 
 
     Preconditions.checkNotNull(perlabelThetaNormalizer);
-
+    */
     return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer,
         alphaI);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Mon Jun  4 03:42:31 2012
@@ -31,8 +31,14 @@ public class ComplementaryNaiveBayesClas
   @Override
   public double getScoreForLabelFeature(int label, int feature) {
     NaiveBayesModel model = getModel();
-    double numerator = model.featureWeight(feature) - model.weight(label, feature) + model.alphaI();
-    double denominator =  model.totalWeightSum() - model.labelWeight(label) + model.alphaI() * model.numFeatures();
-    return Math.log(numerator / denominator);
+    return computeWeight(model.featureWeight(feature), model.weight(label, feature),
+        model.totalWeightSum(), model.labelWeight(label), model.alphaI(), model.numFeatures());
+  }
+
+  public static double computeWeight(double featureWeight, double featureLabelWeight,
+      double totalWeight, double labelWeight, double alphaI, double numFeatures) {
+    double numerator = featureWeight - featureLabelWeight + alphaI;
+    double denominator = totalWeight - labelWeight + alphaI * numFeatures;
+    return -Math.log(numerator / denominator);
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Mon Jun  4 03:42:31 2012
@@ -38,6 +38,7 @@ public class NaiveBayesModel {
 
   private final Vector weightsPerLabel;
   private final Vector perlabelThetaNormalizer;
+  private final double minThetaNormalizer;
   private final Vector weightsPerFeature;
   private final Matrix weightsPerLabelAndFeature;
   private final float alphaI;
@@ -56,6 +57,7 @@ public class NaiveBayesModel {
     this.numFeatures = weightsPerFeature.getNumNondefaultElements();
     this.totalWeightSum = weightsPerLabel.zSum();
     this.alphaI = alphaI;
+    this.minThetaNormalizer = thetaNormalizer.maxValue();
   }
 
   public double labelWeight(int label) {
@@ -63,7 +65,7 @@ public class NaiveBayesModel {
   }
 
   public double thetaNormalizer(int label) {
-    return perlabelThetaNormalizer.get(label);
+    return perlabelThetaNormalizer.get(label) / minThetaNormalizer;
   }
 
   public double featureWeight(int feature) {
@@ -147,10 +149,17 @@ public class NaiveBayesModel {
     Preconditions.checkArgument(weightsPerLabel.getNumNondefaultElements() > 0,
         "the number of labels has to be greater than 0!");
     Preconditions.checkArgument(perlabelThetaNormalizer != null, "the theta normalizers have to be defined");
-    Preconditions.checkArgument(perlabelThetaNormalizer.getNumNondefaultElements() > 0,
-        "the number of theta normalizers has to be greater than 0!");
+    // Preconditions.checkArgument(perlabelThetaNormalizer.getNumNondefaultElements() > 0,
+    //    "the number of theta normalizers has to be greater than 0!");
     Preconditions.checkArgument(weightsPerFeature != null, "the feature sums have to be defined");
     Preconditions.checkArgument(weightsPerFeature.getNumNondefaultElements() > 0,
         "the feature sums have to be greater than 0!");
+    // Check if all thetas have same sign.
+    /*Iterator<Element> it = perlabelThetaNormalizer.iterateNonZero();
+    while (it.hasNext()) {
+      Element e = it.next();
+      Preconditions.checkArgument(Math.signum(e.get()) == Math.signum(minThetaNormalizer), e.get()
+          + "  " + minThetaNormalizer);
+    }*/
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java Mon Jun  4 03:42:31 2012
@@ -28,10 +28,14 @@ public class StandardNaiveBayesClassifie
   @Override
   public double getScoreForLabelFeature(int label, int feature) {
     NaiveBayesModel model = getModel();
-    double numerator = model.weight(label, feature) + model.alphaI();
-    double denominator = model.labelWeight(label) + model.alphaI() * model.numFeatures();
+    return computeWeight(model.weight(label, feature), model.labelWeight(label), model.alphaI(),
+        model.numFeatures());
+  }
 
-    return -Math.log(numerator / denominator);
+  public static double computeWeight(double featureLabelWeight, double labelWeight, double alphaI,
+      double numFeatures) {
+    double numerator = featureLabelWeight + alphaI;
+    double denominator = labelWeight + alphaI * numFeatures;
+    return Math.log(numerator / denominator);
   }
-  
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java Mon Jun  4 03:42:31 2012
@@ -17,10 +17,11 @@
 
 package org.apache.mahout.classifier.naivebayes.training;
 
-import org.apache.mahout.math.Vector;
-
 import java.util.Iterator;
 
+import org.apache.mahout.classifier.naivebayes.ComplementaryNaiveBayesClassifier;
+import org.apache.mahout.math.Vector;
+
 public class ComplementaryThetaTrainer extends AbstractThetaTrainer {
 
   public ComplementaryThetaTrainer(Vector weightsPerFeature, Vector weightsPerLabel, double alphaI) {
@@ -29,14 +30,13 @@ public class ComplementaryThetaTrainer e
 
   @Override
   public void train(int label, Vector perLabelWeight) {
-    double sigmaK = labelWeight(label);
+    double labelWeight = labelWeight(label);
     Iterator<Vector.Element> it = perLabelWeight.iterateNonZero();
     while (it.hasNext()) {
       Vector.Element e = it.next();
-      double numerator = featureWeight(e.index()) - e.get() + alphaI();
-      double denominator = totalWeightSum() - sigmaK + numFeatures() ;
-      double weight = Math.log(numerator / denominator);
-      updatePerLabelThetaNormalizer(label, weight);
+      updatePerLabelThetaNormalizer(label,
+          ComplementaryNaiveBayesClassifier.computeWeight(featureWeight(e.index()), e.get(),
+              totalWeightSum(), labelWeight, alphaI(), numFeatures()));
     }
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java Mon Jun  4 03:42:31 2012
@@ -19,6 +19,7 @@
 
 import java.util.Iterator;
 
+import org.apache.mahout.classifier.naivebayes.StandardNaiveBayesClassifier;
 import org.apache.mahout.math.Vector;
 
 public class StandardThetaTrainer extends AbstractThetaTrainer {
@@ -29,14 +30,12 @@ public class StandardThetaTrainer extend
 
   @Override
   public void train(int label, Vector perLabelWeight) {
-    double sigmaK = labelWeight(label);
+    double labelWeight = labelWeight(label);
     Iterator<Vector.Element> it = perLabelWeight.iterateNonZero();
     while (it.hasNext()) {
       Vector.Element e = it.next();
-      double numerator = e.get() + alphaI();
-      double denominator = sigmaK + numFeatures();
-      double weight = Math.log(numerator / denominator);
-      updatePerLabelThetaNormalizer(label, weight);
+      updatePerLabelThetaNormalizer(label,
+          StandardNaiveBayesClassifier.computeWeight(e.get(), labelWeight, alphaI(), numFeatures()));
     }
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Mon Jun  4 03:42:31 2012
@@ -45,7 +45,11 @@ import java.util.Map;
  * This class trains a Naive Bayes Classifier (Parameters for both Naive Bayes and Complementary Naive Bayes)
  */
 public final class TrainNaiveBayesJob extends AbstractJob {
-
+  private static final String TRAIN_COMPLEMENTARY = "trainComplementary";
+  private static final String ALPHA_I = "alphaI";
+  private static final String LABEL_INDEX = "labelIndex";
+  private static final String EXTRACT_LABELS = "extractLabels";
+  private static final String LABELS = "labels";
   public static final String WEIGHTS_PER_FEATURE = "__SPF";
   public static final String WEIGHTS_PER_LABEL = "__SPL";
   public static final String LABEL_THETA_NORMALIZER = "_LTN";
@@ -63,12 +67,12 @@ public final class TrainNaiveBayesJob ex
 
     addInputOption();
     addOutputOption();
-    addOption("labels", "l", "comma-separated list of labels to include in training", false);
+    addOption(LABELS, "l", "comma-separated list of labels to include in training", false);
 
-    addOption(buildOption("extractLabels", "el", "Extract the labels from the input", false, false, ""));
-    addOption("alphaI", "a", "smoothing parameter", String.valueOf(1.0f));
-    addOption(buildOption("trainComplementary", "c", "train complementary?", false, false, String.valueOf(false)));
-    addOption("labelIndex", "li", "The path to store the label index in", false);
+    addOption(buildOption(EXTRACT_LABELS, "el", "Extract the labels from the input", false, false, ""));
+    addOption(ALPHA_I, "a", "smoothing parameter", String.valueOf(1.0f));
+    addOption(buildOption(TRAIN_COMPLEMENTARY, "c", "train complementary?", false, false, String.valueOf(false)));
+    addOption(LABEL_INDEX, "li", "The path to store the label index in", false);
     addOption(DefaultOptionCreator.overwriteOption().create());
     Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
@@ -79,15 +83,15 @@ public final class TrainNaiveBayesJob ex
       HadoopUtil.delete(getConf(), getTempPath());
     }
     Path labPath;
-    String labPathStr = getOption("labelIndex");
+    String labPathStr = getOption(LABEL_INDEX);
     if (labPathStr != null) {
       labPath = new Path(labPathStr);
     } else {
-      labPath = getTempPath("labelIndex");
+      labPath = getTempPath(LABEL_INDEX);
     }
     long labelSize = createLabelIndex(labPath);
-    float alphaI = Float.parseFloat(getOption("alphaI"));
-    boolean trainComplementary = Boolean.parseBoolean(getOption("trainComplementary"));
+    float alphaI = Float.parseFloat(getOption(ALPHA_I));
+    boolean trainComplementary = Boolean.parseBoolean(getOption(TRAIN_COMPLEMENTARY));
 
 
     HadoopUtil.setSerializations(getConf());
@@ -123,10 +127,11 @@ public final class TrainNaiveBayesJob ex
     thetaSummer.setCombinerClass(VectorSumReducer.class);
     thetaSummer.getConfiguration().setFloat(ThetaMapper.ALPHA_I, alphaI);
     thetaSummer.getConfiguration().setBoolean(ThetaMapper.TRAIN_COMPLEMENTARY, trainComplementary);
+    /* TODO(robinanil): Enable this when thetanormalization works.
     succeeded = thetaSummer.waitForCompletion(true);
     if (!succeeded) {
       return -1;
-    }
+    }*/
     
     //validate our model and then write it out to the official output
     NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(getTempPath(), getConf());
@@ -138,10 +143,10 @@ public final class TrainNaiveBayesJob ex
 
   private long createLabelIndex(Path labPath) throws IOException {
     long labelSize = 0;
-    if (hasOption("labels")) {
-      Iterable<String> labels = Splitter.on(",").split(getOption("labels"));
+    if (hasOption(LABELS)) {
+      Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
       labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
-    } else if (hasOption("extractLabels")) {
+    } else if (hasOption(EXTRACT_LABELS)) {
       SequenceFileDirIterable<Text, IntWritable> iterable =
               new SequenceFileDirIterable<Text, IntWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), getConf());
       labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java Mon Jun  4 03:42:31 2012
@@ -42,8 +42,8 @@ public class NaiveBayesTest extends Maho
   private File outputDir;
   private File tempDir;
 
-  static final Text LABEL_STOLEN = new Text("stolen");
-  static final Text LABEL_NOT_STOLEN = new Text("not_stolen");
+  static final Text LABEL_STOLEN = new Text("/stolen/");
+  static final Text LABEL_NOT_STOLEN = new Text("/not_stolen/");
 
   static final Vector.Element COLOR_RED = MathHelper.elem(0, 1);
   static final Vector.Element COLOR_YELLOW = MathHelper.elem(1, 1);
@@ -69,16 +69,16 @@ public class NaiveBayesTest extends Maho
         new Path(inputFile.getAbsolutePath()), Text.class, VectorWritable.class);
 
     try {
-      writer.append(LABEL_STOLEN,      trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
+      writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
-      writer.append(LABEL_STOLEN,      trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
+      writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_DOMESTIC));
-      writer.append(LABEL_STOLEN,      trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_IMPORTED));
+      writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_IMPORTED));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
-      writer.append(LABEL_STOLEN,      trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
+      writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_DOMESTIC));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_IMPORTED));
-      writer.append(LABEL_STOLEN,      trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_IMPORTED));
+      writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_IMPORTED));
     } finally {
       Closeables.closeQuietly(writer);
     }
@@ -89,7 +89,7 @@ public class NaiveBayesTest extends Maho
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
     trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
-        "--labels", "stolen,not_stolen", "--tempDir", tempDir.getAbsolutePath() });
+        "-el", "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
 
@@ -108,7 +108,7 @@ public class NaiveBayesTest extends Maho
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
     trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
-        "--labels", "stolen,not_stolen", "--trainComplementary",
+        "-el", "--trainComplementary",
         "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java Mon Jun  4 03:42:31 2012
@@ -59,7 +59,7 @@ public class IndexInstancesMapperTest ex
     IndexInstancesMapper indexInstances = new IndexInstancesMapper();
     setField(indexInstances, "labelIndex", labelIndex);
 
-    indexInstances.map(new Text("bird"), instance, ctx);
+    indexInstances.map(new Text("/bird/"), instance, ctx);
 
     EasyMock.verify(ctx);
   }
@@ -77,7 +77,7 @@ public class IndexInstancesMapperTest ex
     IndexInstancesMapper indexInstances = new IndexInstancesMapper();
     setField(indexInstances, "labelIndex", labelIndex);
 
-    indexInstances.map(new Text("fish"), instance, ctx);
+    indexInstances.map(new Text("/fish/"), instance, ctx);
 
     EasyMock.verify(ctx, skippedInstances);
   }

Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Mon Jun  4 03:42:31 2012
@@ -97,7 +97,7 @@ if [ "x$alg" == "xnaivebayes"  -o  "x$al
     -i ${WORK_DIR}/20news-vectors/tfidf-vectors \
     --trainingOutput ${WORK_DIR}/20news-train-vectors \
     --testOutput ${WORK_DIR}/20news-test-vectors  \
-    --randomSelectionPct 20 --overwrite --sequenceFiles -xm sequential
+    --randomSelectionPct 40 --overwrite --sequenceFiles -xm sequential
 
   echo "Training Naive Bayes model"
   ./bin/mahout trainnb \