You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/03 20:35:00 UTC

svn commit: r1345735 - in /mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes: ./ test/ training/

Author: robinanil
Date: Sun Jun  3 18:34:59 2012
New Revision: 1345735

URL: http://svn.apache.org/viewvc?rev=1345735&view=rev
Log:
MAHOUT-1006 making end to end example work

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Sun Jun  3 18:34:59 2012
@@ -55,9 +55,7 @@ public abstract class AbstractNaiveBayes
 
   @Override
   public Vector classifyFull(Vector instance) {
-    System.out.println(1);
     Vector score = model.createScoringVector();
-    System.out.println(score.size());
     for (int label = 0; label < model.numLabels(); label++) {
       score.set(label, getScoreForLabelInstance(label, instance));
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Sun Jun  3 18:34:59 2012
@@ -116,7 +116,7 @@ public final class BayesUtils {
     int i = 0;
     try {
       for (Object label : labels) {
-        String theLabel = ((Pair<?,?>) label).getFirst().toString();
+        String theLabel = ((Pair<?,?>) label).getFirst().toString().split("/")[1];
         if (!seen.contains(theLabel)) {
           writer.append(new Text(theLabel), new IntWritable(i++));
           seen.add(theLabel);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Sun Jun  3 18:34:59 2012
@@ -36,5 +36,4 @@ public class ComplementaryNaiveBayesClas
 
     return Math.log(numerator / denominator);
   }
-
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java Sun Jun  3 18:34:59 2012
@@ -59,6 +59,6 @@ public class BayesTestMapper extends Map
   protected void map(Text key, VectorWritable value, Context context) throws IOException, InterruptedException {
     Vector result = classifier.classifyFull(value.get());
     //the key is the expected value
-    context.write(key, new VectorWritable(result));
+    context.write(new Text(key.toString().split("/")[1]), new VectorWritable(result));
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java Sun Jun  3 18:34:59 2012
@@ -99,7 +99,8 @@ public class TestNaiveBayesDriver extend
       Text key = new Text();
       VectorWritable vw = new VectorWritable();
       while (reader.next(key, vw)) {
-        writer.append(key, new VectorWritable(classifier.classifyFull(vw.get())));
+        writer.append(new Text(key.toString().split("/")[1]),
+            new VectorWritable(classifier.classifyFull(vw.get())));
       }
       writer.close();
       reader.close();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java Sun Jun  3 18:34:59 2012
@@ -40,7 +40,7 @@ public class IndexInstancesMapper extend
 
   @Override
   protected void map(Text labelText, VectorWritable instance, Context ctx) throws IOException, InterruptedException {
-    String label = labelText.toString();
+    String label = labelText.toString().split("/")[1]; 
     if (labelIndex.containsKey(label)) {
       ctx.write(new IntWritable(labelIndex.get(label)), instance);
     } else {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java Sun Jun  3 18:34:59 2012
@@ -41,7 +41,7 @@ public class ThetaMapper extends Mapper<
     Configuration conf = ctx.getConfiguration();
 
     float alphaI = conf.getFloat(ALPHA_I, 1.0f);
-    Map<String,Vector> scores = BayesUtils.readScoresFromCache(conf);
+    Map<String, Vector> scores = BayesUtils.readScoresFromCache(conf);
 
     if (conf.getBoolean(TRAIN_COMPLEMENTARY, false)) {
       trainer = new ComplementaryThetaTrainer(scores.get(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE),

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Sun Jun  3 18:34:59 2012
@@ -112,8 +112,10 @@ public final class TrainNaiveBayesJob ex
     if (!succeeded) {
       return -1;
     }
+    
     //put the per label and per feature vectors into the cache
     HadoopUtil.cacheFiles(getTempPath(WEIGHTS), getConf());
+    
     //calculate the Thetas, write out to LABEL_THETA_NORMALIZER vectors -- TODO: add reference here to the part of the Rennie paper that discusses this
     Job thetaSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS), getTempPath(THETAS),
             SequenceFileInputFormat.class, ThetaMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class,
@@ -125,6 +127,7 @@ public final class TrainNaiveBayesJob ex
     if (!succeeded) {
       return -1;
     }
+    
     //validate our model and then write it out to the official output
     NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(getTempPath(), getConf());
     naiveBayesModel.validate();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java Sun Jun  3 18:34:59 2012
@@ -51,7 +51,6 @@ public class WeightsMapper extends Mappe
     }
 
     int label = index.get();
-//    instance.addTo(weightsPerFeature);
     weightsPerFeature.assign(instance, Functions.PLUS);
     weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
   }