You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/03 20:35:00 UTC
svn commit: r1345735 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes:
./ test/ training/
Author: robinanil
Date: Sun Jun 3 18:34:59 2012
New Revision: 1345735
URL: http://svn.apache.org/viewvc?rev=1345735&view=rev
Log:
MAHOUT-1006 making end to end example work
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Sun Jun 3 18:34:59 2012
@@ -55,9 +55,7 @@ public abstract class AbstractNaiveBayes
@Override
public Vector classifyFull(Vector instance) {
- System.out.println(1);
Vector score = model.createScoringVector();
- System.out.println(score.size());
for (int label = 0; label < model.numLabels(); label++) {
score.set(label, getScoreForLabelInstance(label, instance));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Sun Jun 3 18:34:59 2012
@@ -116,7 +116,7 @@ public final class BayesUtils {
int i = 0;
try {
for (Object label : labels) {
- String theLabel = ((Pair<?,?>) label).getFirst().toString();
+ String theLabel = ((Pair<?,?>) label).getFirst().toString().split("/")[1];
if (!seen.contains(theLabel)) {
writer.append(new Text(theLabel), new IntWritable(i++));
seen.add(theLabel);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java Sun Jun 3 18:34:59 2012
@@ -36,5 +36,4 @@ public class ComplementaryNaiveBayesClas
return Math.log(numerator / denominator);
}
-
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java Sun Jun 3 18:34:59 2012
@@ -59,6 +59,6 @@ public class BayesTestMapper extends Map
protected void map(Text key, VectorWritable value, Context context) throws IOException, InterruptedException {
Vector result = classifier.classifyFull(value.get());
//the key is the expected value
- context.write(key, new VectorWritable(result));
+ context.write(new Text(key.toString().split("/")[1]), new VectorWritable(result));
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java Sun Jun 3 18:34:59 2012
@@ -99,7 +99,8 @@ public class TestNaiveBayesDriver extend
Text key = new Text();
VectorWritable vw = new VectorWritable();
while (reader.next(key, vw)) {
- writer.append(key, new VectorWritable(classifier.classifyFull(vw.get())));
+ writer.append(new Text(key.toString().split("/")[1]),
+ new VectorWritable(classifier.classifyFull(vw.get())));
}
writer.close();
reader.close();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java Sun Jun 3 18:34:59 2012
@@ -40,7 +40,7 @@ public class IndexInstancesMapper extend
@Override
protected void map(Text labelText, VectorWritable instance, Context ctx) throws IOException, InterruptedException {
- String label = labelText.toString();
+ String label = labelText.toString().split("/")[1];
if (labelIndex.containsKey(label)) {
ctx.write(new IntWritable(labelIndex.get(label)), instance);
} else {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java Sun Jun 3 18:34:59 2012
@@ -41,7 +41,7 @@ public class ThetaMapper extends Mapper<
Configuration conf = ctx.getConfiguration();
float alphaI = conf.getFloat(ALPHA_I, 1.0f);
- Map<String,Vector> scores = BayesUtils.readScoresFromCache(conf);
+ Map<String, Vector> scores = BayesUtils.readScoresFromCache(conf);
if (conf.getBoolean(TRAIN_COMPLEMENTARY, false)) {
trainer = new ComplementaryThetaTrainer(scores.get(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE),
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Sun Jun 3 18:34:59 2012
@@ -112,8 +112,10 @@ public final class TrainNaiveBayesJob ex
if (!succeeded) {
return -1;
}
+
//put the per label and per feature vectors into the cache
HadoopUtil.cacheFiles(getTempPath(WEIGHTS), getConf());
+
//calculate the Thetas, write out to LABEL_THETA_NORMALIZER vectors -- TODO: add reference here to the part of the Rennie paper that discusses this
Job thetaSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS), getTempPath(THETAS),
SequenceFileInputFormat.class, ThetaMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class,
@@ -125,6 +127,7 @@ public final class TrainNaiveBayesJob ex
if (!succeeded) {
return -1;
}
+
//validate our model and then write it out to the official output
NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(getTempPath(), getConf());
naiveBayesModel.validate();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java Sun Jun 3 18:34:59 2012
@@ -51,7 +51,6 @@ public class WeightsMapper extends Mappe
}
int label = index.get();
-// instance.addTo(weightsPerFeature);
weightsPerFeature.assign(instance, Functions.PLUS);
weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
}