You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2015/04/05 01:00:19 UTC

mahout git commit: MAHOUT-1635: Getting an exception when I provide classification labels manually for Naive Bayes. closes apache/mahout#103

Repository: mahout
Updated Branches:
  refs/heads/master 1bcda3214 -> 88520fb1b


MAHOUT-1635: Getting an exception when I provide classification labels manually for Naive Bayes. closes apache/mahout#103


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/88520fb1
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/88520fb1
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/88520fb1

Branch: refs/heads/master
Commit: 88520fb1b54953967f4d7e299cfd772f4357c768
Parents: 1bcda32
Author: Andrew Palumbo <ap...@apache.org>
Authored: Sat Apr 4 18:59:05 2015 -0400
Committer: Andrew Palumbo <ap...@apache.org>
Committed: Sat Apr 4 18:59:05 2015 -0400

----------------------------------------------------------------------
 CHANGELOG                                       |  2 ++
 examples/bin/classify-20newsgroups.sh           |  2 +-
 examples/bin/classify-wikipedia.sh              |  1 -
 .../naivebayes/training/TrainNaiveBayesJob.java | 23 ++++++--------------
 .../classifier/naivebayes/NaiveBayesTest.java   |  4 ++--
 5 files changed, 12 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index af9c81c..5be099b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.10.0 - unreleased
 
+  MAHOUT-1635: Getting an exception when I provide classification labels manually for Naive Bayes (apalumbo)
+
   MAHOUT-1662: Potential Path bug in SequenceFileVaultIterator breaks DisplaySpectralKMeans (Shannon Quinn)
 
   MAHOUT-1656: Change SNAPSHOT version from 1.0 to 0.10.0 (smarthi)

http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index 061487b..ea949e0 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -135,7 +135,7 @@ if  ( [ "x$alg" == "xnaivebayes-MapReduce" ] ||  [ "x$alg" == "xcnaivebayes-MapR
 
       echo "Training Naive Bayes model"
       ./bin/mahout trainnb \
-        -i ${WORK_DIR}/20news-train-vectors -el \
+        -i ${WORK_DIR}/20news-train-vectors \
         -o ${WORK_DIR}/model \
         -li ${WORK_DIR}/labelindex \
         -ow $c

http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 0bdb9a2..359ba70 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -156,7 +156,6 @@ if [ "x$alg" == "xCBayes" ] || [ "x$alg" == "xBinaryCBayes" ] ; then
 
   echo "Training Naive Bayes model"
   $MAHOUT_HOME/bin/mahout trainnb -i ${WORK_DIR}/training \
-                                  -el \
                                   -o ${WORK_DIR}/model \
                                   -li ${WORK_DIR}/labelindex \
                                   -ow \

http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
index ac1c4c9..5373436 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
@@ -48,12 +48,9 @@ public final class TrainNaiveBayesJob extends AbstractJob {
   private static final String TRAIN_COMPLEMENTARY = "trainComplementary";
   private static final String ALPHA_I = "alphaI";
   private static final String LABEL_INDEX = "labelIndex";
-  private static final String EXTRACT_LABELS = "extractLabels";
-  private static final String LABELS = "labels";
   public static final String WEIGHTS_PER_FEATURE = "__SPF";
   public static final String WEIGHTS_PER_LABEL = "__SPL";
   public static final String LABEL_THETA_NORMALIZER = "_LTN";
-
   public static final String SUMMED_OBSERVATIONS = "summedObservations";
   public static final String WEIGHTS = "weights";
   public static final String THETAS = "thetas";
@@ -67,13 +64,12 @@ public final class TrainNaiveBayesJob extends AbstractJob {
 
     addInputOption();
     addOutputOption();
-    addOption(LABELS, "l", "comma-separated list of labels to include in training", false);
 
-    addOption(buildOption(EXTRACT_LABELS, "el", "Extract the labels from the input", false, false, ""));
     addOption(ALPHA_I, "a", "smoothing parameter", String.valueOf(1.0f));
     addOption(buildOption(TRAIN_COMPLEMENTARY, "c", "train complementary?", false, false, String.valueOf(false)));
     addOption(LABEL_INDEX, "li", "The path to store the label index in", false);
     addOption(DefaultOptionCreator.overwriteOption().create());
+
     Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
@@ -170,17 +166,12 @@ public final class TrainNaiveBayesJob extends AbstractJob {
 
   private long createLabelIndex(Path labPath) throws IOException {
     long labelSize = 0;
-    if (hasOption(LABELS)) {
-      Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
-      labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
-    } else if (hasOption(EXTRACT_LABELS)) {
-      Iterable<Pair<Text,IntWritable>> iterable =
-          new SequenceFileDirIterable<Text, IntWritable>(getInputPath(),
-                                                         PathType.LIST,
-                                                         PathFilters.logsCRCFilter(),
-                                                         getConf());
-      labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
-    }
+    Iterable<Pair<Text,IntWritable>> iterable =
+      new SequenceFileDirIterable<Text, IntWritable>(getInputPath(),
+                                                     PathType.LIST,
+                                                     PathFilters.logsCRCFilter(),
+                                                     getConf());
+    labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
     return labelSize;
   }
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/88520fb1/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
index 974b90c..abd666e 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
@@ -89,7 +89,7 @@ public class NaiveBayesTest extends MahoutTestCase {
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
     trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
-        "-el", "--tempDir", tempDir.getAbsolutePath() });
+        "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
 
@@ -108,7 +108,7 @@ public class NaiveBayesTest extends MahoutTestCase {
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
     trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
-        "-el", "--trainComplementary",
+        "--trainComplementary",
         "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);