You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ad...@apache.org on 2011/10/09 06:40:44 UTC

svn commit: r1180544 - in /mahout/trunk: core/src/main/java/org/apache/mahout/df/ core/src/main/java/org/apache/mahout/df/mapreduce/partial/ core/src/main/java/org/apache/mahout/ga/watchmaker/ examples/src/main/java/org/apache/mahout/df/

Author: adeneche
Date: Sun Oct  9 04:40:44 2011
New Revision: 1180544

URL: http://svn.apache.org/viewvc?rev=1180544&view=rev
Log:
Fixed a small bug in DecisionForest Bagging.build()

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java Sun Oct  9 04:40:44 2011
@@ -23,6 +23,7 @@ import java.util.Random;
 import org.apache.mahout.df.builder.TreeBuilder;
 import org.apache.mahout.df.callback.PredictionCallback;
 import org.apache.mahout.df.data.Data;
+import org.apache.mahout.df.data.Instance;
 import org.apache.mahout.df.node.Node;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -65,8 +66,9 @@ public class Bagging {
       log.debug("Oob error estimation");
       for (int index = 0; index < data.size(); index++) {
         if (!sampled[index]) {
-          int prediction = tree.classify(data.get(index));
-          callback.prediction(treeId, index, prediction);
+        	Instance instance = data.get(index);
+          int prediction = tree.classify(instance);
+          callback.prediction(treeId, instance.getId(), prediction);
         }
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java Sun Oct  9 04:40:44 2011
@@ -65,6 +65,9 @@ public class Step1Mapper extends MapredM
   /** will contain all instances if this mapper's split */
   private final List<Instance> instances = Lists.newArrayList();
   
+  /** current instance's id */
+  private int id;
+  
   public int getFirstTreeId() {
     return firstTreeId;
   }
@@ -140,7 +143,7 @@ public class Step1Mapper extends MapredM
   
   @Override
   protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-    instances.add(converter.convert((int) key.get(), value.toString()));
+    instances.add(converter.convert(id++, value.toString()));
   }
   
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Sun Oct  9 04:40:44 2011
@@ -48,7 +48,7 @@ public final class OutputUtils {
   public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException {
     Collection<Path> outpaths = Lists.newArrayList();
     for (FileStatus s : fs.listStatus(outpath, PathFilters.logsCRCFilter())) {
-      if (!s.isDir()) {
+      if (!s.isDir() && !s.getPath().getName().startsWith("_")) {
         outpaths.add(s.getPath());
       }
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java Sun Oct  9 04:40:44 2011
@@ -99,7 +99,7 @@ public class BreimanExample extends Conf
     Data train = data.clone();
     Data test = train.rsplit(rng, (int) (data.size() * 0.1));
     
-    int[] trainLabels = train.extractLabels();
+    int[] labels = data.extractLabels();
     int[] testLabels = test.extractLabels();
     
     DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
@@ -107,8 +107,8 @@ public class BreimanExample extends Conf
     SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);
     
     // grow a forest with m = log2(M)+1
-    ForestPredictions errorM = new ForestPredictions(train.size(), nblabels); // oob error when using m =
-                                                                              // log2(M)+1
+    ForestPredictions errorM = new ForestPredictions(data.size(), nblabels); // oob error when using m =
+                                                                             // log2(M)+1
     treeBuilder.setM(m);
     
     long time = System.currentTimeMillis();
@@ -117,11 +117,11 @@ public class BreimanExample extends Conf
     sumTimeM += System.currentTimeMillis() - time;
     numNodesM += forestM.nbNodes();
     
-    double oobM = ErrorEstimate.errorRate(trainLabels, errorM.computePredictions(rng)); // oob error estimate
-                                                                                        // when m = log2(M)+1
+    double oobM = ErrorEstimate.errorRate(labels, errorM.computePredictions(rng)); // oob error estimate
+                                                                                   // when m = log2(M)+1
     
     // grow a forest with m=1
-    ForestPredictions errorOne = new ForestPredictions(train.size(), nblabels); // oob error when using m = 1
+    ForestPredictions errorOne = new ForestPredictions(data.size(), nblabels); // oob error when using m = 1
     treeBuilder.setM(1);
     
     time = System.currentTimeMillis();
@@ -130,9 +130,9 @@ public class BreimanExample extends Conf
     sumTimeOne += System.currentTimeMillis() - time;
     numNodesOne += forestOne.nbNodes();
     
-    double oobOne = ErrorEstimate.errorRate(trainLabels, errorOne.computePredictions(rng)); // oob error
-                                                                                            // estimate when m
-                                                                                            // = 1
+    double oobOne = ErrorEstimate.errorRate(labels, errorOne.computePredictions(rng)); // oob error
+                                                                                       // estimate when m
+                                                                                       // = 1
     
     // compute the test set error (Selection Error), and mean tree error (One Tree Error),
     // using the lowest oob error forest