You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/04 05:42:32 UTC
svn commit: r1345821 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/classifier/naivebayes/
core/src/main/java/org/apache/mahout/classifier/naivebayes/training/
core/src/test/java/org/apache/mahout/classifier/naivebayes/
core/src/test/java/org...
Author: robinanil
Date: Mon Jun 4 03:42:31 2012
New Revision: 1345821
URL: http://svn.apache.org/viewvc?rev=1345821&view=rev
Log:
MAHOUT-1006 Fixes test to use new format, disabled theta training phase for now. Some code cleanup
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java
mahout/trunk/examples/bin/classify-20newsgroups.sh
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Mon Jun 4 03:42:31 2012
@@ -45,7 +45,7 @@ public abstract class AbstractNaiveBayes
Element e = elements.next();
result += e.get() * getScoreForLabelFeature(label, e.index());
}
- return -result;
+ return result;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Mon Jun 4 03:42:31 2012
@@ -78,16 +78,16 @@ public final class BayesUtils {
scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
}
- Vector perlabelThetaNormalizer = null;
- for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
+ Vector perlabelThetaNormalizer = scoresPerLabel.like();
+ /* for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) {
if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) {
perlabelThetaNormalizer = entry.getSecond().get();
}
- }
+ }
Preconditions.checkNotNull(perlabelThetaNormalizer);
-
+ */
return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer,
alphaI);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Mon Jun 4 03:42:31 2012
@@ -31,8 +31,14 @@ public class ComplementaryNaiveBayesClas
@Override
public double getScoreForLabelFeature(int label, int feature) {
NaiveBayesModel model = getModel();
- double numerator = model.featureWeight(feature) - model.weight(label, feature) + model.alphaI();
- double denominator = model.totalWeightSum() - model.labelWeight(label) + model.alphaI() * model.numFeatures();
- return Math.log(numerator / denominator);
+ return computeWeight(model.featureWeight(feature), model.weight(label, feature),
+ model.totalWeightSum(), model.labelWeight(label), model.alphaI(), model.numFeatures());
+ }
+
+ public static double computeWeight(double featureWeight, double featureLabelWeight,
+ double totalWeight, double labelWeight, double alphaI, double numFeatures) {
+ double numerator = featureWeight - featureLabelWeight + alphaI;
+ double denominator = totalWeight - labelWeight + alphaI * numFeatures;
+ return -Math.log(numerator / denominator);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Mon Jun 4 03:42:31 2012
@@ -38,6 +38,7 @@ public class NaiveBayesModel {
private final Vector weightsPerLabel;
private final Vector perlabelThetaNormalizer;
+ private final double minThetaNormalizer;
private final Vector weightsPerFeature;
private final Matrix weightsPerLabelAndFeature;
private final float alphaI;
@@ -56,6 +57,7 @@ public class NaiveBayesModel {
this.numFeatures = weightsPerFeature.getNumNondefaultElements();
this.totalWeightSum = weightsPerLabel.zSum();
this.alphaI = alphaI;
+ this.minThetaNormalizer = thetaNormalizer.maxValue();
}
public double labelWeight(int label) {
@@ -63,7 +65,7 @@ public class NaiveBayesModel {
}
public double thetaNormalizer(int label) {
- return perlabelThetaNormalizer.get(label);
+ return perlabelThetaNormalizer.get(label) / minThetaNormalizer;
}
public double featureWeight(int feature) {
@@ -147,10 +149,17 @@ public class NaiveBayesModel {
Preconditions.checkArgument(weightsPerLabel.getNumNondefaultElements() > 0,
"the number of labels has to be greater than 0!");
Preconditions.checkArgument(perlabelThetaNormalizer != null, "the theta normalizers have to be defined");
- Preconditions.checkArgument(perlabelThetaNormalizer.getNumNondefaultElements() > 0,
- "the number of theta normalizers has to be greater than 0!");
+ // Preconditions.checkArgument(perlabelThetaNormalizer.getNumNondefaultElements() > 0,
+ // "the number of theta normalizers has to be greater than 0!");
Preconditions.checkArgument(weightsPerFeature != null, "the feature sums have to be defined");
Preconditions.checkArgument(weightsPerFeature.getNumNondefaultElements() > 0,
"the feature sums have to be greater than 0!");
+ // Check if all thetas have same sign.
+ /*Iterator<Element> it = perlabelThetaNormalizer.iterateNonZero();
+ while (it.hasNext()) {
+ Element e = it.next();
+ Preconditions.checkArgument(Math.signum(e.get()) == Math.signum(minThetaNormalizer), e.get()
+ + " " + minThetaNormalizer);
+ }*/
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/StandardNaiveBayesClassifier.java Mon Jun 4 03:42:31 2012
@@ -28,10 +28,14 @@ public class StandardNaiveBayesClassifie
@Override
public double getScoreForLabelFeature(int label, int feature) {
NaiveBayesModel model = getModel();
- double numerator = model.weight(label, feature) + model.alphaI();
- double denominator = model.labelWeight(label) + model.alphaI() * model.numFeatures();
+ return computeWeight(model.weight(label, feature), model.labelWeight(label), model.alphaI(),
+ model.numFeatures());
+ }
- return -Math.log(numerator / denominator);
+ public static double computeWeight(double featureLabelWeight, double labelWeight, double alphaI,
+ double numFeatures) {
+ double numerator = featureLabelWeight + alphaI;
+ double denominator = labelWeight + alphaI * numFeatures;
+ return Math.log(numerator / denominator);
}
-
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ComplementaryThetaTrainer.java Mon Jun 4 03:42:31 2012
@@ -17,10 +17,11 @@
package org.apache.mahout.classifier.naivebayes.training;
-import org.apache.mahout.math.Vector;
-
import java.util.Iterator;
+import org.apache.mahout.classifier.naivebayes.ComplementaryNaiveBayesClassifier;
+import org.apache.mahout.math.Vector;
+
public class ComplementaryThetaTrainer extends AbstractThetaTrainer {
public ComplementaryThetaTrainer(Vector weightsPerFeature, Vector weightsPerLabel, double alphaI) {
@@ -29,14 +30,13 @@ public class ComplementaryThetaTrainer e
@Override
public void train(int label, Vector perLabelWeight) {
- double sigmaK = labelWeight(label);
+ double labelWeight = labelWeight(label);
Iterator<Vector.Element> it = perLabelWeight.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
- double numerator = featureWeight(e.index()) - e.get() + alphaI();
- double denominator = totalWeightSum() - sigmaK + numFeatures() ;
- double weight = Math.log(numerator / denominator);
- updatePerLabelThetaNormalizer(label, weight);
+ updatePerLabelThetaNormalizer(label,
+ ComplementaryNaiveBayesClassifier.computeWeight(featureWeight(e.index()), e.get(),
+ totalWeightSum(), labelWeight, alphaI(), numFeatures()));
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/StandardThetaTrainer.java Mon Jun 4 03:42:31 2012
@@ -19,6 +19,7 @@
import java.util.Iterator;
+import org.apache.mahout.classifier.naivebayes.StandardNaiveBayesClassifier;
import org.apache.mahout.math.Vector;
public class StandardThetaTrainer extends AbstractThetaTrainer {
@@ -29,14 +30,12 @@ public class StandardThetaTrainer extend
@Override
public void train(int label, Vector perLabelWeight) {
- double sigmaK = labelWeight(label);
+ double labelWeight = labelWeight(label);
Iterator<Vector.Element> it = perLabelWeight.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
- double numerator = e.get() + alphaI();
- double denominator = sigmaK + numFeatures();
- double weight = Math.log(numerator / denominator);
- updatePerLabelThetaNormalizer(label, weight);
+ updatePerLabelThetaNormalizer(label,
+ StandardNaiveBayesClassifier.computeWeight(e.get(), labelWeight, alphaI(), numFeatures()));
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Mon Jun 4 03:42:31 2012
@@ -45,7 +45,11 @@ import java.util.Map;
* This class trains a Naive Bayes Classifier (Parameters for both Naive Bayes and Complementary Naive Bayes)
*/
public final class TrainNaiveBayesJob extends AbstractJob {
-
+ private static final String TRAIN_COMPLEMENTARY = "trainComplementary";
+ private static final String ALPHA_I = "alphaI";
+ private static final String LABEL_INDEX = "labelIndex";
+ private static final String EXTRACT_LABELS = "extractLabels";
+ private static final String LABELS = "labels";
public static final String WEIGHTS_PER_FEATURE = "__SPF";
public static final String WEIGHTS_PER_LABEL = "__SPL";
public static final String LABEL_THETA_NORMALIZER = "_LTN";
@@ -63,12 +67,12 @@ public final class TrainNaiveBayesJob ex
addInputOption();
addOutputOption();
- addOption("labels", "l", "comma-separated list of labels to include in training", false);
+ addOption(LABELS, "l", "comma-separated list of labels to include in training", false);
- addOption(buildOption("extractLabels", "el", "Extract the labels from the input", false, false, ""));
- addOption("alphaI", "a", "smoothing parameter", String.valueOf(1.0f));
- addOption(buildOption("trainComplementary", "c", "train complementary?", false, false, String.valueOf(false)));
- addOption("labelIndex", "li", "The path to store the label index in", false);
+ addOption(buildOption(EXTRACT_LABELS, "el", "Extract the labels from the input", false, false, ""));
+ addOption(ALPHA_I, "a", "smoothing parameter", String.valueOf(1.0f));
+ addOption(buildOption(TRAIN_COMPLEMENTARY, "c", "train complementary?", false, false, String.valueOf(false)));
+ addOption(LABEL_INDEX, "li", "The path to store the label index in", false);
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
@@ -79,15 +83,15 @@ public final class TrainNaiveBayesJob ex
HadoopUtil.delete(getConf(), getTempPath());
}
Path labPath;
- String labPathStr = getOption("labelIndex");
+ String labPathStr = getOption(LABEL_INDEX);
if (labPathStr != null) {
labPath = new Path(labPathStr);
} else {
- labPath = getTempPath("labelIndex");
+ labPath = getTempPath(LABEL_INDEX);
}
long labelSize = createLabelIndex(labPath);
- float alphaI = Float.parseFloat(getOption("alphaI"));
- boolean trainComplementary = Boolean.parseBoolean(getOption("trainComplementary"));
+ float alphaI = Float.parseFloat(getOption(ALPHA_I));
+ boolean trainComplementary = Boolean.parseBoolean(getOption(TRAIN_COMPLEMENTARY));
HadoopUtil.setSerializations(getConf());
@@ -123,10 +127,11 @@ public final class TrainNaiveBayesJob ex
thetaSummer.setCombinerClass(VectorSumReducer.class);
thetaSummer.getConfiguration().setFloat(ThetaMapper.ALPHA_I, alphaI);
thetaSummer.getConfiguration().setBoolean(ThetaMapper.TRAIN_COMPLEMENTARY, trainComplementary);
+ /* TODO(robinanil): Enable this when thetanormalization works.
succeeded = thetaSummer.waitForCompletion(true);
if (!succeeded) {
return -1;
- }
+ }*/
//validate our model and then write it out to the official output
NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(getTempPath(), getConf());
@@ -138,10 +143,10 @@ public final class TrainNaiveBayesJob ex
private long createLabelIndex(Path labPath) throws IOException {
long labelSize = 0;
- if (hasOption("labels")) {
- Iterable<String> labels = Splitter.on(",").split(getOption("labels"));
+ if (hasOption(LABELS)) {
+ Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
- } else if (hasOption("extractLabels")) {
+ } else if (hasOption(EXTRACT_LABELS)) {
SequenceFileDirIterable<Text, IntWritable> iterable =
new SequenceFileDirIterable<Text, IntWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), getConf());
labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java Mon Jun 4 03:42:31 2012
@@ -42,8 +42,8 @@ public class NaiveBayesTest extends Maho
private File outputDir;
private File tempDir;
- static final Text LABEL_STOLEN = new Text("stolen");
- static final Text LABEL_NOT_STOLEN = new Text("not_stolen");
+ static final Text LABEL_STOLEN = new Text("/stolen/");
+ static final Text LABEL_NOT_STOLEN = new Text("/not_stolen/");
static final Vector.Element COLOR_RED = MathHelper.elem(0, 1);
static final Vector.Element COLOR_YELLOW = MathHelper.elem(1, 1);
@@ -69,16 +69,16 @@ public class NaiveBayesTest extends Maho
new Path(inputFile.getAbsolutePath()), Text.class, VectorWritable.class);
try {
- writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
+ writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
- writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
+ writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_DOMESTIC));
- writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_IMPORTED));
+ writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SPORTS, ORIGIN_IMPORTED));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
- writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
+ writer.append(LABEL_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_IMPORTED));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_DOMESTIC));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_IMPORTED));
- writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_IMPORTED));
+ writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_IMPORTED));
} finally {
Closeables.closeQuietly(writer);
}
@@ -89,7 +89,7 @@ public class NaiveBayesTest extends Maho
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
- "--labels", "stolen,not_stolen", "--tempDir", tempDir.getAbsolutePath() });
+ "-el", "--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
@@ -108,7 +108,7 @@ public class NaiveBayesTest extends Maho
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
- "--labels", "stolen,not_stolen", "--trainComplementary",
+ "-el", "--trainComplementary",
"--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapperTest.java Mon Jun 4 03:42:31 2012
@@ -59,7 +59,7 @@ public class IndexInstancesMapperTest ex
IndexInstancesMapper indexInstances = new IndexInstancesMapper();
setField(indexInstances, "labelIndex", labelIndex);
- indexInstances.map(new Text("bird"), instance, ctx);
+ indexInstances.map(new Text("/bird/"), instance, ctx);
EasyMock.verify(ctx);
}
@@ -77,7 +77,7 @@ public class IndexInstancesMapperTest ex
IndexInstancesMapper indexInstances = new IndexInstancesMapper();
setField(indexInstances, "labelIndex", labelIndex);
- indexInstances.map(new Text("fish"), instance, ctx);
+ indexInstances.map(new Text("/fish/"), instance, ctx);
EasyMock.verify(ctx, skippedInstances);
}
Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1345821&r1=1345820&r2=1345821&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Mon Jun 4 03:42:31 2012
@@ -97,7 +97,7 @@ if [ "x$alg" == "xnaivebayes" -o "x$al
-i ${WORK_DIR}/20news-vectors/tfidf-vectors \
--trainingOutput ${WORK_DIR}/20news-train-vectors \
--testOutput ${WORK_DIR}/20news-test-vectors \
- --randomSelectionPct 20 --overwrite --sequenceFiles -xm sequential
+ --randomSelectionPct 40 --overwrite --sequenceFiles -xm sequential
echo "Training Naive Bayes model"
./bin/mahout trainnb \