You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2015/04/05 01:00:19 UTC
mahout git commit: MAHOUT-1635: Getting an exception when I provide
classification labels manually for Naive Bayes. closes apache/mahout#103
Repository: mahout
Updated Branches:
refs/heads/master 1bcda3214 -> 88520fb1b
MAHOUT-1635: Getting an exception when I provide classification labels manually for Naive Bayes. closes apache/mahout#103
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/88520fb1
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/88520fb1
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/88520fb1
Branch: refs/heads/master
Commit: 88520fb1b54953967f4d7e299cfd772f4357c768
Parents: 1bcda32
Author: Andrew Palumbo <ap...@apache.org>
Authored: Sat Apr 4 18:59:05 2015 -0400
Committer: Andrew Palumbo <ap...@apache.org>
Committed: Sat Apr 4 18:59:05 2015 -0400
----------------------------------------------------------------------
CHANGELOG | 2 ++
examples/bin/classify-20newsgroups.sh | 2 +-
examples/bin/classify-wikipedia.sh | 1 -
.../naivebayes/training/TrainNaiveBayesJob.java | 23 ++++++--------------
.../classifier/naivebayes/NaiveBayesTest.java | 4 ++--
5 files changed, 12 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index af9c81c..5be099b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.10.0 - unreleased
+ MAHOUT-1635: Getting an exception when I provide classification labels manually for Naive Bayes (apalumbo)
+
MAHOUT-1662: Potential Path bug in SequenceFileVaultIterator breaks DisplaySpectralKMeans (Shannon Quinn)
MAHOUT-1656: Change SNAPSHOT version from 1.0 to 0.10.0 (smarthi)
http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index 061487b..ea949e0 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -135,7 +135,7 @@ if ( [ "x$alg" == "xnaivebayes-MapReduce" ] || [ "x$alg" == "xcnaivebayes-MapR
echo "Training Naive Bayes model"
./bin/mahout trainnb \
- -i ${WORK_DIR}/20news-train-vectors -el \
+ -i ${WORK_DIR}/20news-train-vectors \
-o ${WORK_DIR}/model \
-li ${WORK_DIR}/labelindex \
-ow $c
http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 0bdb9a2..359ba70 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -156,7 +156,6 @@ if [ "x$alg" == "xCBayes" ] || [ "x$alg" == "xBinaryCBayes" ] ; then
echo "Training Naive Bayes model"
$MAHOUT_HOME/bin/mahout trainnb -i ${WORK_DIR}/training \
- -el \
-o ${WORK_DIR}/model \
-li ${WORK_DIR}/labelindex \
-ow \
http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
index ac1c4c9..5373436 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
@@ -48,12 +48,9 @@ public final class TrainNaiveBayesJob extends AbstractJob {
private static final String TRAIN_COMPLEMENTARY = "trainComplementary";
private static final String ALPHA_I = "alphaI";
private static final String LABEL_INDEX = "labelIndex";
- private static final String EXTRACT_LABELS = "extractLabels";
- private static final String LABELS = "labels";
public static final String WEIGHTS_PER_FEATURE = "__SPF";
public static final String WEIGHTS_PER_LABEL = "__SPL";
public static final String LABEL_THETA_NORMALIZER = "_LTN";
-
public static final String SUMMED_OBSERVATIONS = "summedObservations";
public static final String WEIGHTS = "weights";
public static final String THETAS = "thetas";
@@ -67,13 +64,12 @@ public final class TrainNaiveBayesJob extends AbstractJob {
addInputOption();
addOutputOption();
- addOption(LABELS, "l", "comma-separated list of labels to include in training", false);
- addOption(buildOption(EXTRACT_LABELS, "el", "Extract the labels from the input", false, false, ""));
addOption(ALPHA_I, "a", "smoothing parameter", String.valueOf(1.0f));
addOption(buildOption(TRAIN_COMPLEMENTARY, "c", "train complementary?", false, false, String.valueOf(false)));
addOption(LABEL_INDEX, "li", "The path to store the label index in", false);
addOption(DefaultOptionCreator.overwriteOption().create());
+
Map<String, List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
return -1;
@@ -170,17 +166,12 @@ public final class TrainNaiveBayesJob extends AbstractJob {
private long createLabelIndex(Path labPath) throws IOException {
long labelSize = 0;
- if (hasOption(LABELS)) {
- Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
- labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
- } else if (hasOption(EXTRACT_LABELS)) {
- Iterable<Pair<Text,IntWritable>> iterable =
- new SequenceFileDirIterable<Text, IntWritable>(getInputPath(),
- PathType.LIST,
- PathFilters.logsCRCFilter(),
- getConf());
- labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
- }
+ Iterable<Pair<Text,IntWritable>> iterable =
+ new SequenceFileDirIterable<Text, IntWritable>(getInputPath(),
+ PathType.LIST,
+ PathFilters.logsCRCFilter(),
+ getConf());
+ labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
return labelSize;
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
index 974b90c..abd666e 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
@@ -89,7 +89,7 @@ public class NaiveBayesTest extends MahoutTestCase {
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
- "-el", "--tempDir", tempDir.getAbsolutePath() });
+ "--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
@@ -108,7 +108,7 @@ public class NaiveBayesTest extends MahoutTestCase {
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
- "-el", "--trainComplementary",
+ "--trainComplementary",
"--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);