You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 21:33:14 UTC
svn commit: r909910 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker:
EvalMapper.java MahoutEvaluator.java MahoutFitnessEvaluator.java
OutputUtils.java STEvolutionEngine.java STFitnessEvaluator.java
Author: robinanil
Date: Sat Feb 13 20:33:13 2010
New Revision: 909910
URL: http://svn.apache.org/viewvc?rev=909910&view=rev
Log:
MAHOUT-291
mahout-ga code style changes
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java Sat Feb 13 20:33:13 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.ga.watchmaker;
+import java.io.IOException;
+
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -28,43 +30,45 @@
import org.apache.mahout.common.StringUtils;
import org.uncommons.watchmaker.framework.FitnessEvaluator;
-import java.io.IOException;
-
/**
- * <p> Generic Mapper class for fitness evaluation. Works with the following : <code><key, candidate, key,
- * fitness></code>, where : </p> key: position of the current candidate in the input file. <br> candidate: candidate
- * solution to evaluate. <br> fitness: evaluated fitness for the given candidate.
+ * <p>
+ * Generic Mapper class for fitness evaluation. Works with the following : <code><key, candidate, key,
+ * fitness></code>, where :
+ * </p>
+ * key: position of the current candidate in the input file. <br>
+ * candidate: candidate solution to evaluate. <br>
+ * fitness: evaluated fitness for the given candidate.
*/
public class EvalMapper extends MapReduceBase implements
- Mapper<LongWritable, Text, LongWritable, DoubleWritable> {
-
+ Mapper<LongWritable,Text,LongWritable,DoubleWritable> {
+
/** Parameter used to store the "stringified" evaluator */
public static final String MAHOUT_GA_EVALUATOR = "mahout.ga.evaluator";
-
+
private FitnessEvaluator<Object> evaluator = null;
-
+
@Override
public void configure(JobConf job) {
- String evlstr = job.get(MAHOUT_GA_EVALUATOR);
+ String evlstr = job.get(EvalMapper.MAHOUT_GA_EVALUATOR);
if (evlstr == null) {
- throw new IllegalArgumentException(
- "'MAHOUT_GA_EVALUATOR' job parameter non found");
+ throw new IllegalArgumentException("'MAHOUT_GA_EVALUATOR' job parameter non found");
}
-
+
evaluator = StringUtils.fromString(evlstr);
-
+
super.configure(job);
}
-
+
@Override
- public void map(LongWritable key, Text value,
- OutputCollector<LongWritable, DoubleWritable> output, Reporter reporter)
- throws IOException {
+ public void map(LongWritable key,
+ Text value,
+ OutputCollector<LongWritable,DoubleWritable> output,
+ Reporter reporter) throws IOException {
Object candidate = StringUtils.fromString(value.toString());
-
+
double fitness = evaluator.getFitness(candidate, null);
-
+
output.collect(key, new DoubleWritable(fitness));
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Sat Feb 13 20:33:13 2010
@@ -17,6 +17,11 @@
package org.apache.mahout.ga.watchmaker;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.List;
+
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -31,104 +36,104 @@
import org.apache.mahout.common.StringUtils;
import org.uncommons.watchmaker.framework.FitnessEvaluator;
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.List;
-
/**
- * Generic Mahout distributed evaluator. takes an evaluator and a population and launches a Hadoop job. The job
- * evaluates the fitness of each individual of the population using the given evaluator. Takes care of storing the
- * population into an input file, and loading the fitness from job outputs.
+ * Generic Mahout distributed evaluator. takes an evaluator and a population and launches a Hadoop job. The
+ * job evaluates the fitness of each individual of the population using the given evaluator. Takes care of
+ * storing the population into an input file, and loading the fitness from job outputs.
*/
public class MahoutEvaluator {
- private MahoutEvaluator() {
- }
-
+ private MahoutEvaluator() { }
+
/**
* Uses Mahout to evaluate every candidate from the input population using the given evaluator.
- *
- * @param evaluator FitnessEvaluator to use
- * @param population input population
- * @param evaluations <code>List<Double></code> that contains the evaluated fitness for each candidate from the
- * input population, sorted in the same order as the candidates.
+ *
+ * @param evaluator
+ * FitnessEvaluator to use
+ * @param population
+ * input population
+ * @param evaluations
+ * <code>List<Double></code> that contains the evaluated fitness for each candidate from the
+ * input population, sorted in the same order as the candidates.
*/
- public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population,
- List<Double> evaluations) throws IOException {
+ public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population, List<Double> evaluations) throws IOException {
JobConf conf = new JobConf(MahoutEvaluator.class);
FileSystem fs = FileSystem.get(conf);
- Path inpath = prepareInput(fs, population);
+ Path inpath = MahoutEvaluator.prepareInput(fs, population);
Path outpath = OutputUtils.prepareOutput(fs);
-
-
- configureJob(conf, evaluator, inpath, outpath);
+
+ MahoutEvaluator.configureJob(conf, evaluator, inpath, outpath);
JobClient.runJob(conf);
-
+
OutputUtils.importEvaluations(fs, conf, outpath, evaluations);
}
-
+
/**
* Create the input directory and stores the population in it.
- *
- * @param fs <code>FileSystem</code> to use
- * @param population population to store
+ *
+ * @param fs
+ * <code>FileSystem</code> to use
+ * @param population
+ * population to store
* @return input <code>Path</code>
*/
- private static Path prepareInput(FileSystem fs, List<?> population)
- throws IOException {
+ private static Path prepareInput(FileSystem fs, List<?> population) throws IOException {
Path inpath = new Path(fs.getWorkingDirectory(), "input");
-
+
// Delete the input if it already exists
if (fs.exists(inpath)) {
fs.delete(inpath, true);
}
-
+
fs.mkdirs(inpath);
-
- storePopulation(fs, new Path(inpath, "population"), population);
-
+
+ MahoutEvaluator.storePopulation(fs, new Path(inpath, "population"), population);
+
return inpath;
}
-
+
/**
* Configure the job
- *
- * @param evaluator FitnessEvaluator passed to the mapper
- * @param inpath input <code>Path</code>
- * @param outpath output <code>Path</code>
+ *
+ * @param evaluator
+ * FitnessEvaluator passed to the mapper
+ * @param inpath
+ * input <code>Path</code>
+ * @param outpath
+ * output <code>Path</code>
*/
- private static void configureJob(JobConf conf, FitnessEvaluator<?> evaluator,
- Path inpath, Path outpath) {
+ private static void configureJob(JobConf conf, FitnessEvaluator<?> evaluator, Path inpath, Path outpath) {
FileInputFormat.setInputPaths(conf, inpath);
FileOutputFormat.setOutputPath(conf, outpath);
-
+
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(DoubleWritable.class);
-
+
conf.setMapperClass(EvalMapper.class);
// no combiner
// identity reducer
// TODO do we really need a reducer at all ?
-
+
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
-
+
// store the stringified evaluator
conf.set(EvalMapper.MAHOUT_GA_EVALUATOR, StringUtils.toString(evaluator));
}
-
+
/**
* Stores a population of candidates in the output file path.
- *
- * @param fs FileSystem used to create the output file
- * @param f output file path
- * @param population population to store
+ *
+ * @param fs
+ * FileSystem used to create the output file
+ * @param f
+ * output file path
+ * @param population
+ * population to store
*/
- static void storePopulation(FileSystem fs, Path f, List<?> population)
- throws IOException {
+ static void storePopulation(FileSystem fs, Path f, List<?> population) throws IOException {
FSDataOutputStream out = fs.create(f);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
-
+
try {
for (Object candidate : population) {
writer.write(StringUtils.toString(candidate));
@@ -138,5 +143,5 @@
writer.close();
}
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutFitnessEvaluator.java Sat Feb 13 20:33:13 2010
@@ -17,20 +17,20 @@
package org.apache.mahout.ga.watchmaker;
-import org.uncommons.watchmaker.framework.FitnessEvaluator;
-
import java.io.IOException;
import java.util.List;
+import org.uncommons.watchmaker.framework.FitnessEvaluator;
+
/** Watchmaker compatible Fitness Evaluator that delegates the evaluation of the whole population to Mahout. */
public class MahoutFitnessEvaluator<T> extends STFitnessEvaluator<T> {
-
+
private final FitnessEvaluator<? super T> evaluator;
-
+
public MahoutFitnessEvaluator(FitnessEvaluator<? super T> evaluator) {
this.evaluator = evaluator;
}
-
+
@Override
protected void evaluate(List<? extends T> population, List<Double> evaluations) {
try {
@@ -39,10 +39,10 @@
throw new IllegalStateException("Exception while evaluating the population", e);
}
}
-
+
@Override
public boolean isNatural() {
return evaluator.isNatural();
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Sat Feb 13 20:33:13 2010
@@ -17,6 +17,10 @@
package org.apache.mahout.ga.watchmaker;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -26,42 +30,40 @@
import org.apache.hadoop.io.SequenceFile.Sorter;
import org.apache.hadoop.mapred.JobConf;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
/** Utility Class that deals with the output. */
public final class OutputUtils {
-
+
private OutputUtils() {
- // do nothing
+ // do nothing
}
-
+
/**
* Removes the output directory if it already exists.
- *
- * @param fs <code>FileSystem</code> to use
+ *
+ * @param fs
+ * <code>FileSystem</code> to use
* @return output <code>Path</code>
*/
public static Path prepareOutput(FileSystem fs) throws IOException {
Path outpath = new Path(fs.getWorkingDirectory(), "output");
-
+
if (fs.exists(outpath)) {
fs.delete(outpath, true);
}
-
+
return outpath;
}
-
+
/**
* Lists all files in the output <code>Path</code>
- *
- * @param fs <code>FileSystem</code> to use
- * @param outpath output <code>Path</code>
+ *
+ * @param fs
+ * <code>FileSystem</code> to use
+ * @param outpath
+ * output <code>Path</code>
* @return <code>Path</code> array
*/
- public static Path[] listOutputFiles(FileSystem fs, Path outpath)
- throws IOException {
+ public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException {
FileStatus[] status = fs.listStatus(outpath);
List<Path> outpaths = new ArrayList<Path>();
for (FileStatus s : status) {
@@ -69,28 +71,29 @@
outpaths.add(s.getPath());
}
}
-
+
Path[] outfiles = new Path[outpaths.size()];
outpaths.toArray(outfiles);
-
+
return outfiles;
}
-
+
/**
* Reads back the evaluations.
- *
- * @param outpath output <code>Path</code>
- * @param evaluations List of evaluations
+ *
+ * @param outpath
+ * output <code>Path</code>
+ * @param evaluations
+ * List of evaluations
*/
- public static void importEvaluations(FileSystem fs, JobConf conf,
- Path outpath, List<Double> evaluations) throws IOException {
+ public static void importEvaluations(FileSystem fs, JobConf conf, Path outpath, List<Double> evaluations) throws IOException {
Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf);
-
+
// merge and sort the outputs
- Path[] outfiles = listOutputFiles(fs, outpath);
+ Path[] outfiles = OutputUtils.listOutputFiles(fs, outpath);
Path output = new Path(outpath, "output.sorted");
sorter.merge(outfiles, output);
-
+
// import the evaluations
LongWritable key = new LongWritable();
DoubleWritable value = new DoubleWritable();
@@ -103,5 +106,5 @@
reader.close();
}
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STEvolutionEngine.java Sat Feb 13 20:33:13 2010
@@ -17,6 +17,11 @@
package org.apache.mahout.ga.watchmaker;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
import org.uncommons.watchmaker.framework.AbstractEvolutionEngine;
import org.uncommons.watchmaker.framework.CandidateFactory;
import org.uncommons.watchmaker.framework.EvaluatedCandidate;
@@ -24,35 +29,30 @@
import org.uncommons.watchmaker.framework.FitnessEvaluator;
import org.uncommons.watchmaker.framework.SelectionStrategy;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
/** Single Threaded Evolution Engine. */
public class STEvolutionEngine<T> extends AbstractEvolutionEngine<T> {
-
+
public STEvolutionEngine(CandidateFactory<T> candidateFactory,
EvolutionaryOperator<T> evolutionScheme,
FitnessEvaluator<? super T> fitnessEvaluator,
- SelectionStrategy<? super T> selectionStrategy, Random rng) {
+ SelectionStrategy<? super T> selectionStrategy,
+ Random rng) {
super(candidateFactory, evolutionScheme, fitnessEvaluator, selectionStrategy, rng);
}
-
+
/** @see org.uncommons.watchmaker.framework.AbstractEvolutionEngine#evaluatePopulation(java.util.List) */
@Override
protected List<EvaluatedCandidate<T>> evaluatePopulation(List<T> population) {
List<Double> evaluations = new ArrayList<Double>();
STFitnessEvaluator<? super T> evaluator = (STFitnessEvaluator<? super T>) getFitnessEvaluator();
-
+
evaluator.evaluate(population, evaluations);
-
+
List<EvaluatedCandidate<T>> evaluatedPopulation = new ArrayList<EvaluatedCandidate<T>>();
for (int index = 0; index < population.size(); index++) {
- evaluatedPopulation.add(new EvaluatedCandidate<T>(population.get(index),
- evaluations.get(index)));
+ evaluatedPopulation.add(new EvaluatedCandidate<T>(population.get(index), evaluations.get(index)));
}
-
+
// Sort candidates in descending order according to fitness.
if (getFitnessEvaluator().isNatural()) // Descending values for natural fitness.
{
@@ -61,8 +61,8 @@
{
Collections.sort(evaluatedPopulation);
}
-
+
return evaluatedPopulation;
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java?rev=909910&r1=909909&r2=909910&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/STFitnessEvaluator.java Sat Feb 13 20:33:13 2010
@@ -17,18 +17,18 @@
package org.apache.mahout.ga.watchmaker;
-import org.uncommons.watchmaker.framework.FitnessEvaluator;
-
import java.util.ArrayList;
import java.util.List;
+import org.uncommons.watchmaker.framework.FitnessEvaluator;
+
/** Special Fitness Evaluator that evaluates all the population ones. */
public abstract class STFitnessEvaluator<T> implements FitnessEvaluator<T> {
-
+
private final List<Double> evaluations = new ArrayList<Double>();
-
+
private List<? extends T> population;
-
+
@Override
public double getFitness(T candidate, List<? extends T> population) {
// evaluate the population, when needed
@@ -37,15 +37,15 @@
evaluate(population, evaluations);
this.population = population;
}
-
+
int index = population.indexOf(candidate);
if (index == -1) {
throw new IllegalArgumentException("Candidate is not part of the population");
}
-
+
return evaluations.get(index);
}
-
+
protected abstract void evaluate(List<? extends T> population, List<Double> evaluations);
-
+
}