You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/07 22:21:49 UTC
svn commit: r942203 - in /lucene/mahout/trunk: examples/
examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/
examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/
examples/src/main/java/org/apache/maho...
Author: srowen
Date: Fri May 7 20:21:49 2010
New Revision: 942203
URL: http://svn.apache.org/viewvc?rev=942203&view=rev
Log:
More MAHOUT-302 madness; this time substantially fixed by letting examples and core depend on math
Modified:
lucene/mahout/trunk/examples/pom.xml
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
lucene/mahout/trunk/utils/pom.xml
Modified: lucene/mahout/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/pom.xml?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/pom.xml (original)
+++ lucene/mahout/trunk/examples/pom.xml Fri May 7 20:21:49 2010
@@ -143,6 +143,18 @@
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
+ <artifactId>mahout-math</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mahout</groupId>
+ <artifactId>mahout-math</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mahout</groupId>
<artifactId>mahout-utils</artifactId>
<version>${project.version}</version>
</dependency>
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Fri May 7 20:21:49 2010
@@ -27,7 +27,6 @@ import org.apache.commons.cli2.builder.A
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
@@ -141,7 +140,8 @@ public final class Job {
"org.apache.mahout.math.RandomAccessSparseVector");
CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2, true);
- ClusterDumper clusterDumper = new ClusterDumper("output/clusters-0", "output/clusteredPoints");
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Fri May 7 20:21:49 2010
@@ -30,7 +30,6 @@ import org.apache.commons.cli2.builder.A
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.clustering.dirichlet.DirichletCluster;
@@ -130,13 +129,6 @@ public class Job {
* the alpha0 value for the DirichletDistribution
* @param numReducers
* the desired number of reducers
- * @throws IllegalAccessException
- * @throws InstantiationException
- * @throws ClassNotFoundException
- * @throws InvocationTargetException
- * @throws NoSuchMethodException
- * @throws IllegalArgumentException
- * @throws SecurityException
*/
public static void runJob(Path input,
Path output,
@@ -159,8 +151,9 @@ public class Job {
InputDriver.runJob(input, directoryContainingConvertedInput, vectorClassName);
DirichletDriver.runJob(directoryContainingConvertedInput, output, modelFactory,
vectorClassName, 60, numModels, maxIterations, alpha_0, numReducers, true, true, 0);
-
- ClusterDumper clusterDumper = new ClusterDumper("output/clusters-5", "output/clusteredPoints");
+
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Fri May 7 20:21:49 2010
@@ -27,7 +27,6 @@ import org.apache.commons.cli2.builder.A
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
@@ -149,7 +148,8 @@ public final class Job {
KMeansDriver.runJob(directoryContainingConvertedInput, new Path(output, Cluster.INITIAL_CLUSTERS_DIR), output, measureClass,
convergenceDelta, maxIterations, 1, true);
- ClusterDumper clusterDumper = new ClusterDumper("output/clusters-10", "output/clusteredPoints");
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Fri May 7 20:21:49 2010
@@ -133,7 +133,8 @@ public final class Job {
MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2,
convergenceDelta, maxIterations, true, true);
- ClusterDumper clusterDumper = new ClusterDumper("output/clusters-10", "output/clusteredPoints");
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java Fri May 7 20:21:49 2010
@@ -60,9 +60,10 @@ public class CDFitnessEvaluator extends
protected void evaluate(List<? extends Rule> population,
List<Double> evaluations) {
evals.clear();
-
+
+ Path output = new Path("output");
try {
- CDMahoutEvaluator.evaluate(population, target, dataset, evals, split);
+ CDMahoutEvaluator.evaluate(population, target, dataset, output, evals, split);
} catch (IOException e) {
throw new IllegalStateException("Exception while evaluating the population", e);
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Fri May 7 20:21:49 2010
@@ -200,13 +200,15 @@ public class CDGA {
// evolve the rules over the training set
Rule solution = engine.evolve(popSize, 1, new GenerationCount(genCount));
-
+
+ Path output = new Path("output");
+
// fitness over the training set
- CDFitness bestTrainFit = CDMahoutEvaluator.evaluate(solution, target, inpath, split);
+ CDFitness bestTrainFit = CDMahoutEvaluator.evaluate(solution, target, inpath, output, split);
// fitness over the testing set
split.setTraining(false);
- CDFitness bestTestFit = CDMahoutEvaluator.evaluate(solution, target, inpath, split);
+ CDFitness bestTestFit = CDMahoutEvaluator.evaluate(solution, target, inpath, output, split);
// evaluate the solution over the testing set
log.info("Best solution fitness (train set) : {}", bestTrainFit);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java Fri May 7 20:21:49 2010
@@ -117,10 +117,10 @@ public class FileInfoParser {
throw new IllegalArgumentException("null inpath parameter");
}
if (!fs.exists(inpath)) {
- throw new IllegalArgumentException("Input path does not exist");
+ throw new IllegalArgumentException("Input path does not exist: " + inpath);
}
if (!fs.getFileStatus(inpath).isDir()) {
- throw new IllegalArgumentException("Input path should be a directory");
+ throw new IllegalArgumentException("Input path should be a directory: " + inpath);
}
// info file name
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java Fri May 7 20:21:49 2010
@@ -63,7 +63,7 @@ public class CDMahoutEvaluator {
* @throws IOException
*/
public static void evaluate(List<? extends Rule> rules, int target,
- Path inpath, List<CDFitness> evaluations, DatasetSplit split)
+ Path inpath, Path output, List<CDFitness> evaluations, DatasetSplit split)
throws IOException {
JobConf conf = new JobConf(CDMahoutEvaluator.class);
FileSystem fs = FileSystem.get(inpath.toUri(), conf);
@@ -72,13 +72,11 @@ public class CDMahoutEvaluator {
if (!fs.exists(inpath) || !fs.getFileStatus(inpath).isDir()) {
throw new IllegalArgumentException("Input path not found or is not a directory");
}
-
- Path outpath = new Path("output");
-
- configureJob(conf, rules, target, inpath, outpath, split);
+
+ configureJob(conf, rules, target, inpath, output, split);
JobClient.runJob(conf);
- importEvaluations(fs, conf, outpath, evaluations);
+ importEvaluations(fs, conf, output, evaluations);
}
/**
@@ -105,11 +103,11 @@ public class CDMahoutEvaluator {
* @return the evaluation
* @throws IOException
*/
- public static CDFitness evaluate(Rule rule, int target, Path inpath,
+ public static CDFitness evaluate(Rule rule, int target, Path inpath, Path output,
DatasetSplit split) throws IOException {
List<CDFitness> evals = new ArrayList<CDFitness>();
- evaluate(Arrays.asList(rule), target, inpath, evals, split);
+ evaluate(Arrays.asList(rule), target, inpath, output, evals, split);
return evals.get(0);
}
@@ -126,8 +124,8 @@ public class CDMahoutEvaluator {
* @throws IOException
*/
public static void evaluate(List<? extends Rule> rules, int target,
- Path inpath, List<CDFitness> evaluations) throws IOException {
- evaluate(rules, target, inpath, evaluations, new DatasetSplit(1));
+ Path inpath, Path output, List<CDFitness> evaluations) throws IOException {
+ evaluate(rules, target, inpath, output, evaluations, new DatasetSplit(1));
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Fri May 7 20:21:49 2010
@@ -75,7 +75,7 @@ public class CDInfosTool {
* generated descriptions for each non ignored attribute
* @throws IOException
*/
- public static void gatherInfos(Descriptors descriptors, Path inpath,
+ public static void gatherInfos(Descriptors descriptors, Path inpath, Path output,
List<String> descriptions) throws IOException {
JobConf conf = new JobConf(CDInfosTool.class);
FileSystem fs = FileSystem.get(inpath.toUri(), conf);
@@ -84,13 +84,11 @@ public class CDInfosTool {
if (!fs.exists(inpath) || !fs.getFileStatus(inpath).isDir()) {
throw new IllegalArgumentException("Input path not found or is not a directory");
}
-
- Path outpath = new Path("output");
-
- configureJob(conf, descriptors, inpath, outpath);
+
+ configureJob(conf, descriptors, inpath, output);
JobClient.runJob(conf);
- importDescriptions(fs, conf, outpath, descriptions);
+ importDescriptions(fs, conf, output, descriptions);
}
/**
@@ -240,20 +238,20 @@ public class CDInfosTool {
return;
}
- String input = cmdLine.getValue(inputOpt).toString();
+ Path input = new Path(cmdLine.getValue(inputOpt).toString());
+ Path output = new Path("output"); // TODO surely this should be configurable?
- Path inpath = new Path(input);
- FileSystem fs = FileSystem.get(inpath.toUri(), new Configuration());
+ FileSystem fs = FileSystem.get(input.toUri(), new Configuration());
log.info("Loading Descriptors...");
- Descriptors descriptors = loadDescriptors(fs, inpath);
+ Descriptors descriptors = loadDescriptors(fs, input);
log.info("Gathering informations...");
List<String> descriptions = new ArrayList<String>();
- gatherInfos(descriptors, inpath, descriptions);
+ gatherInfos(descriptors, input, output, descriptions);
log.info("Storing Descriptions...");
- storeDescriptions(fs, inpath, descriptors, descriptions);
+ storeDescriptions(fs, input, descriptors, descriptions);
} catch (OptionException e) {
log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluatorTest.java Fri May 7 20:21:49 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.ga.watchmaker.cd.hadoop;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.ga.watchmaker.cd.CDFitness;
@@ -43,12 +45,16 @@ public class CDMahoutEvaluatorTest exten
}
// dataset
+ // This is sensitive to the working directory where the test is run:
Path input = new Path("target/test-classes/wdbc");
CDMahoutEvaluator.initializeDataSet(input);
// evaluate the rules
List<CDFitness> results = new ArrayList<CDFitness>();
- CDMahoutEvaluator.evaluate(rules, target, input, results);
+ Path output = getTestTempDirPath("output");
+ FileSystem fs = output.getFileSystem(new Configuration());
+ fs.delete(output, true); // It's unhappy if this directory exists
+ CDMahoutEvaluator.evaluate(rules, target, input, output, results);
// check the results
for (int index = 0; index < nbrules; index++) {
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java Fri May 7 20:21:49 2010
@@ -210,6 +210,7 @@ public class CDInfosToolTest extends Mah
// random dataset
Path inpath = getTestTempDirPath("input");
+ Path output = getTestTempDirPath("output");
FileSystem fs = FileSystem.get(inpath.toUri(), new Configuration());
HadoopUtil.overwriteOutput(inpath);
@@ -217,7 +218,8 @@ public class CDInfosToolTest extends Mah
// Start the tool
List<String> result = new ArrayList<String>();
- CDInfosTool.gatherInfos(descriptors, inpath, result);
+ fs.delete(output, true); // It's unhappy if this directory exists
+ CDInfosTool.gatherInfos(descriptors, inpath, output, result);
// check the results
Collection<String> target = new ArrayList<String>();
Modified: lucene/mahout/trunk/utils/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/pom.xml?rev=942203&r1=942202&r2=942203&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/pom.xml (original)
+++ lucene/mahout/trunk/utils/pom.xml Fri May 7 20:21:49 2010
@@ -125,6 +125,18 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.mahout</groupId>
+ <artifactId>mahout-math</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mahout</groupId>
+ <artifactId>mahout-math</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>