You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2014/02/16 06:58:16 UTC
svn commit: r1568726 - in /mahout/trunk: CHANGELOG
core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
Author: smarthi
Date: Sun Feb 16 05:58:15 2014
New Revision: 1568726
URL: http://svn.apache.org/r1568726
Log:
MAHOUT-1417: Random decision forest implementation fails in Hadoop 2
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1568726&r1=1568725&r2=1568726&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun Feb 16 05:58:15 2014
@@ -2,6 +2,8 @@ Mahout Change Log
Release 1.0 - unreleased
+ MAHOUT-1417: Random decision forest implementation fails in Hadoop 2 (srowen)
+
MAHOUT-1416: Make access of DecisionForest.read(dataInput) less restricted (Manoj Awasthi via smarthi)
MAHOUT-1415: Clone method on sparse matrices fails if there is an empty row which has not been set explicitly (till.rohrmann via ssc)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java?rev=1568726&r1=1568725&r2=1568726&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java Sun Feb 16 05:58:15 2014
@@ -35,15 +35,20 @@ import org.apache.mahout.classifier.df.m
import org.apache.mahout.classifier.df.node.Node;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
+import java.util.List;
/**
* Builds a random forest using partial data. Each mapper uses only the data given by its InputSplit
*/
public class PartialBuilder extends Builder {
+ private static final Logger log = LoggerFactory.getLogger(PartialBuilder.class);
+
public PartialBuilder(TreeBuilder treeBuilder, Path dataPath, Path datasetPath, Long seed) {
this(treeBuilder, dataPath, datasetPath, seed, new Configuration());
}
@@ -73,6 +78,18 @@ public class PartialBuilder extends Buil
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
+
+ // For this implementation to work, mapred.map.tasks needs to be set to the actual
+ // number of mappers Hadoop will use:
+ TextInputFormat inputFormat = new TextInputFormat();
+ List<?> splits = inputFormat.getSplits(job);
+ if (splits == null || splits.isEmpty()) {
+ log.warn("Unable to compute number of splits?");
+ } else {
+ int numSplits = splits.size();
+ log.info("Setting mapred.map.tasks = {}", numSplits);
+ conf.setInt("mapred.map.tasks", numSplits);
+ }
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java?rev=1568726&r1=1568725&r2=1568726&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java Sun Feb 16 05:58:15 2014
@@ -116,8 +116,8 @@ public class Step1Mapper extends MapredM
}
/**
- * Compute the number of trees for a given partition. The first partition (0) may be longer than the rest of
- * partition because of the remainder.
+ * Compute the number of trees for a given partition. The first partitions may be longer
+ * than the rest because of the remainder.
*
* @param numMaps
* total number of maps (partitions)
@@ -127,12 +127,9 @@ public class Step1Mapper extends MapredM
* partition to compute the number of trees for
*/
public static int nbTrees(int numMaps, int numTrees, int partition) {
- int nbTrees = numTrees / numMaps;
- if (partition == 0) {
- nbTrees += numTrees - nbTrees * numMaps;
- }
-
- return nbTrees;
+ int treesPerMapper = numTrees / numMaps;
+ int remainder = numTrees - numMaps * treesPerMapper;
+ return treesPerMapper + (partition < remainder ? 1 : 0);
}
@Override
@@ -162,6 +159,8 @@ public class Step1Mapper extends MapredM
MapredOutput emOut = new MapredOutput(tree);
context.write(key, emOut);
}
+
+ context.progress();
}
}