You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dw...@apache.org on 2008/03/14 15:28:55 UTC
svn commit: r637111 - in /lucene/mahout/trunk/src:
main/java/org/apache/mahout/clustering/canopy/
main/java/org/apache/mahout/clustering/kmeans/
test/java/org/apache/mahout/clustering/canopy/
test/java/org/apache/mahout/clustering/kmeans/
Author: dweiss
Date: Fri Mar 14 07:28:54 2008
New Revision: 637111
URL: http://svn.apache.org/viewvc?rev=637111&view=rev
Log:
MAHOUT-13: Class loader issues fixed by replacing Class.forName to context class loader. This patch also removes the code that used to pass the JAR as argument. All tests pass.
Modified:
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Fri Mar 14 07:28:54 2008
@@ -100,7 +100,8 @@
*/
public static void configure(JobConf job) {
try {
- Class cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
+ final ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+ Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY));
measure = (DistanceMeasure) cl.newInstance();
measure.configure(job);
} catch (Exception e) {
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Fri Mar 14 07:28:54 2008
@@ -29,11 +29,7 @@
String measureClassName = args[2];
float t1 = new Float(args[3]);
float t2 = new Float(args[4]);
- String jarLocation = "apache-mahout-0.1-dev.jar";
- if (args.length > 5) {
- jarLocation = args[5];
- }
- runJob(input, output, measureClassName, t1, t2, jarLocation);
+ runJob(input, output, measureClassName, t1, t2);
}
/**
@@ -46,9 +42,9 @@
* @param t2 the T2 distance threshold
*/
public static void runJob(String input, String output,
- String measureClassName, float t1, float t2, String jarLocation) {
- CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2, jarLocation);
- ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2, jarLocation);
+ String measureClassName, float t1, float t2) {
+ CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2);
+ ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2);
}
}
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Fri Mar 14 07:28:54 2008
@@ -31,11 +31,7 @@
String measureClassName = args[2];
float t1 = new Float(args[3]);
float t2 = new Float(args[4]);
- String jarLocation = "apache-mahout-0.1-dev.jar";
- if (args.length > 5) {
- jarLocation = args[5];
- }
- runJob(input, output, measureClassName, t1, t2, jarLocation);
+ runJob(input, output, measureClassName, t1, t2);
}
/**
@@ -48,11 +44,10 @@
* @param t2 the T2 distance threshold
*/
public static void runJob(String input, String output,
- String measureClassName, float t1, float t2, String jarLocation) {
+ String measureClassName, float t1, float t2) {
JobClient client = new JobClient();
JobConf conf = new JobConf(
org.apache.mahout.clustering.canopy.CanopyDriver.class);
- conf.setJar(jarLocation);
conf.set(Canopy.DISTANCE_MEASURE_KEY, measureClassName);
conf.set(Canopy.T1_KEY, "" + t1);
conf.set(Canopy.T2_KEY, "" + t2);
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Fri Mar 14 07:28:54 2008
@@ -32,11 +32,7 @@
String measureClassName = args[3];
float t1 = new Float(args[4]);
float t2 = new Float(args[5]);
- String jarLocation = "apache-mahout-0.1-dev.jar";
- if (args.length > 6) {
- jarLocation = args[6];
- }
- runJob(points, canopies, output, measureClassName, t1, t2, jarLocation);
+ runJob(points, canopies, output, measureClassName, t1, t2);
}
/**
@@ -51,11 +47,11 @@
* @param jarLocation
*/
public static void runJob(String points, String canopies, String output,
- String measureClassName, float t1, float t2, String jarLocation) {
+ String measureClassName, float t1, float t2) {
JobClient client = new JobClient();
JobConf conf = new JobConf(
org.apache.mahout.clustering.canopy.ClusterDriver.class);
- conf.setJar(jarLocation);
+
conf.set(Canopy.DISTANCE_MEASURE_KEY, measureClassName);
conf.set(Canopy.T1_KEY, "" + t1);
conf.set(Canopy.T2_KEY, "" + t2);
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Fri Mar 14 07:28:54 2008
@@ -95,7 +95,8 @@
*/
public static void configure(JobConf job) {
try {
- Class cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
+ final ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+ Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY));
measure = (DistanceMeasure) cl.newInstance();
measure.configure(job);
convergenceDelta = new Float(job.get(CLUSTER_CONVERGENCE_KEY));
Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Fri Mar 14 07:28:54 2008
@@ -35,12 +35,7 @@
String measureClass = args[3];
String convergenceDelta = args[4];
String maxIterations = args[5];
- String jarLocation = "dist/apache-mahout-0.1-dev.jar";
- if (args.length > 6) {
- jarLocation = args[6];
- }
- runJob(input, clusters, output, measureClass, convergenceDelta,
- maxIterations, jarLocation);
+ runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations);
}
/**
@@ -55,7 +50,7 @@
* @param jarLocation The location of the Mahout jar
*/
public static void runJob(String input, String clustersIn, String output,
- String measureClass, String convergenceDelta, String maxIterations, String jarLocation) {
+ String measureClass, String convergenceDelta, String maxIterations) {
int maxIter = new Integer(maxIterations);
try {
// delete the output directory
@@ -75,7 +70,7 @@
// point the output to a new directory per iteration
String clustersOut = output + "/clusters-" + iteration;
converged = runIteration(input, clustersIn, clustersOut, measureClass,
- convergenceDelta, jarLocation);
+ convergenceDelta);
// now point the input to the old output directory
clustersIn = output + "/clusters-" + iteration;
iteration++;
@@ -83,7 +78,7 @@
// now actually cluster the points
System.out.println("Clustering ");
runClustering(input, clustersIn, output + "/points", measureClass,
- convergenceDelta, jarLocation);
+ convergenceDelta);
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -97,14 +92,12 @@
* @param clustersOut the directory pathname for output clusters
* @param measureClass the classname of the DistanceMeasure
* @param convergenceDelta the convergence delta value
- * @param jarLocation The location of the mahout jar
* @return true if the iteration successfully runs
*/
static boolean runIteration(String input, String clustersIn,
- String clustersOut, String measureClass, String convergenceDelta, String jarLocation) {
+ String clustersOut, String measureClass, String convergenceDelta) {
JobClient client = new JobClient();
JobConf conf = new JobConf(KMeansDriver.class);
- conf.setJar(jarLocation);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
@@ -141,13 +134,11 @@
* @param output the directory pathname for output points
* @param measureClass the classname of the DistanceMeasure
* @param convergenceDelta the convergence delta value
- * @param jarLocation The location of the mahout jar
*/
static void runClustering(String input, String clustersIn, String output,
- String measureClass, String convergenceDelta, String jarLocation) {
+ String measureClass, String convergenceDelta) {
JobClient client = new JobClient();
JobConf conf = new JobConf(KMeansDriver.class);
- conf.setJar(jarLocation);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
Modified: lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Fri Mar 14 07:28:54 2008
@@ -466,8 +466,7 @@
writePointsToFile(points, "testdata/file2");
// now run the Canopy Driver
CanopyDriver.runJob("testdata", "output/canopies",
- ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
// verify output from sequence file
JobConf job = new JobConf(
@@ -502,8 +501,7 @@
writePointsToFile(points, "testdata/file2");
// now run the Canopy Driver
CanopyDriver.runJob("testdata", "output/canopies",
- EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
// verify output from sequence file
JobConf job = new JobConf(
@@ -676,8 +674,7 @@
writePointsToFile(points, "testdata/file2");
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
- ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
BufferedReader reader = new BufferedReader(new FileReader(
"output/clusters/part-00000"));
int count = 0;
@@ -705,8 +702,7 @@
writePointsToFile(points, "testdata/file2");
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
- EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
BufferedReader reader = new BufferedReader(new FileReader(
"output/clusters/part-00000"));
int count = 0;
@@ -736,8 +732,7 @@
writePointsToFileWithPayload(points, "testdata/file2", "file2");
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
- ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
BufferedReader reader = new BufferedReader(new FileReader(
"output/clusters/part-00000"));
int count = 0;
@@ -769,8 +764,7 @@
writePointsToFileWithPayload(points, "testdata/file2", "file2");
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
- EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
BufferedReader reader = new BufferedReader(new FileReader(
"output/clusters/part-00000"));
int count = 0;
@@ -801,8 +795,7 @@
// now run the Canopy Driver. User defined measure happens to be a Manhattan
// subclass so results are same.
CanopyDriver.runJob("testdata", "output/canopies",
- UserDefinedDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ UserDefinedDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
// verify output from sequence file
JobConf job = new JobConf(
Modified: lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Fri Mar 14 07:28:54 2008
@@ -372,9 +372,8 @@
writer.close();
// now run the Job
- String jarLocation = "dist/apache-mahout-0.1-dev.jar";
KMeansDriver.runJob("testdata/points", "testdata/clusters", "output",
- EuclideanDistanceMeasure.class.getName(), "0.001", "10", jarLocation);
+ EuclideanDistanceMeasure.class.getName(), "0.001", "10");
// now compare the expected clusters with actual
File outDir = new File("output/points");
@@ -422,13 +421,11 @@
// now run the Canopy job
CanopyDriver.runJob("testdata/points", "testdata/canopies",
- ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
- "dist/apache-mahout-0.1-dev.jar");
+ ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
// now run the KMeans job
- String jarLocation = "dist/apache-mahout-0.1-dev.jar";
KMeansDriver.runJob("testdata/points", "testdata/canopies", "output",
- EuclideanDistanceMeasure.class.getName(), "0.001", "10", jarLocation);
+ EuclideanDistanceMeasure.class.getName(), "0.001", "10");
// now compare the expected clusters with actual
File outDir = new File("output/points");