You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dw...@apache.org on 2008/03/14 15:28:55 UTC

svn commit: r637111 - in /lucene/mahout/trunk/src: main/java/org/apache/mahout/clustering/canopy/ main/java/org/apache/mahout/clustering/kmeans/ test/java/org/apache/mahout/clustering/canopy/ test/java/org/apache/mahout/clustering/kmeans/

Author: dweiss
Date: Fri Mar 14 07:28:54 2008
New Revision: 637111

URL: http://svn.apache.org/viewvc?rev=637111&view=rev
Log:
MAHOUT-13: Class loader issues fixed by replacing Class.forName to context class loader. This patch also removes the code that used to pass the JAR as argument. All tests pass.

Modified:
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
    lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
    lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Fri Mar 14 07:28:54 2008
@@ -100,7 +100,8 @@
    */
   public static void configure(JobConf job) {
     try {
-      Class cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
+      final ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+      Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY));
       measure = (DistanceMeasure) cl.newInstance();
       measure.configure(job);
     } catch (Exception e) {

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Fri Mar 14 07:28:54 2008
@@ -29,11 +29,7 @@
     String measureClassName = args[2];
     float t1 = new Float(args[3]);
     float t2 = new Float(args[4]);
-    String jarLocation = "apache-mahout-0.1-dev.jar";
-    if (args.length > 5) {
-      jarLocation = args[5];
-    }
-    runJob(input, output, measureClassName, t1, t2, jarLocation);
+    runJob(input, output, measureClassName, t1, t2);
   }
 
   /**
@@ -46,9 +42,9 @@
    * @param t2               the T2 distance threshold
    */
   public static void runJob(String input, String output,
-                            String measureClassName, float t1, float t2, String jarLocation) {
-    CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2, jarLocation);
-    ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2, jarLocation);
+                            String measureClassName, float t1, float t2) {
+    CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2);
+    ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2);
   }
 
 }

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Fri Mar 14 07:28:54 2008
@@ -31,11 +31,7 @@
     String measureClassName = args[2];
     float t1 = new Float(args[3]);
     float t2 = new Float(args[4]);
-    String jarLocation = "apache-mahout-0.1-dev.jar";
-    if (args.length > 5) {
-      jarLocation = args[5];
-    }
-    runJob(input, output, measureClassName, t1, t2, jarLocation);
+    runJob(input, output, measureClassName, t1, t2);
   }
 
   /**
@@ -48,11 +44,10 @@
    * @param t2               the T2 distance threshold
    */
   public static void runJob(String input, String output,
-                            String measureClassName, float t1, float t2, String jarLocation) {
+                            String measureClassName, float t1, float t2) {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(
             org.apache.mahout.clustering.canopy.CanopyDriver.class);
-    conf.setJar(jarLocation);
     conf.set(Canopy.DISTANCE_MEASURE_KEY, measureClassName);
     conf.set(Canopy.T1_KEY, "" + t1);
     conf.set(Canopy.T2_KEY, "" + t2);

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Fri Mar 14 07:28:54 2008
@@ -32,11 +32,7 @@
     String measureClassName = args[3];
     float t1 = new Float(args[4]);
     float t2 = new Float(args[5]);
-    String jarLocation = "apache-mahout-0.1-dev.jar";
-    if (args.length > 6) {
-      jarLocation = args[6];
-    }
-    runJob(points, canopies, output, measureClassName, t1, t2, jarLocation);
+    runJob(points, canopies, output, measureClassName, t1, t2);
   }
 
   /**
@@ -51,11 +47,11 @@
    * @param jarLocation
    */
   public static void runJob(String points, String canopies, String output,
-                            String measureClassName, float t1, float t2, String jarLocation) {
+                            String measureClassName, float t1, float t2) {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(
             org.apache.mahout.clustering.canopy.ClusterDriver.class);
-    conf.setJar(jarLocation);
+
     conf.set(Canopy.DISTANCE_MEASURE_KEY, measureClassName);
     conf.set(Canopy.T1_KEY, "" + t1);
     conf.set(Canopy.T2_KEY, "" + t2);

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Fri Mar 14 07:28:54 2008
@@ -95,7 +95,8 @@
    */
   public static void configure(JobConf job) {
     try {
-      Class cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
+      final ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+      Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY));
       measure = (DistanceMeasure) cl.newInstance();
       measure.configure(job);
       convergenceDelta = new Float(job.get(CLUSTER_CONVERGENCE_KEY));

Modified: lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Fri Mar 14 07:28:54 2008
@@ -35,12 +35,7 @@
     String measureClass = args[3];
     String convergenceDelta = args[4];
     String maxIterations = args[5];
-    String jarLocation = "dist/apache-mahout-0.1-dev.jar";
-    if (args.length > 6) {
-      jarLocation = args[6];
-    }
-    runJob(input, clusters, output, measureClass, convergenceDelta,
-            maxIterations, jarLocation);
+    runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations);
   }
 
   /**
@@ -55,7 +50,7 @@
    * @param jarLocation      The location of the Mahout jar
    */
   public static void runJob(String input, String clustersIn, String output,
-                            String measureClass, String convergenceDelta, String maxIterations, String jarLocation) {
+                            String measureClass, String convergenceDelta, String maxIterations) {
     int maxIter = new Integer(maxIterations);
     try {
       // delete the output directory
@@ -75,7 +70,7 @@
         // point the output to a new directory per iteration
         String clustersOut = output + "/clusters-" + iteration;
         converged = runIteration(input, clustersIn, clustersOut, measureClass,
-                convergenceDelta, jarLocation);
+                convergenceDelta);
         // now point the input to the old output directory
         clustersIn = output + "/clusters-" + iteration;
         iteration++;
@@ -83,7 +78,7 @@
       // now actually cluster the points
       System.out.println("Clustering ");
       runClustering(input, clustersIn, output + "/points", measureClass,
-              convergenceDelta, jarLocation);
+              convergenceDelta);
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
@@ -97,14 +92,12 @@
    * @param clustersOut      the directory pathname for output clusters
    * @param measureClass     the classname of the DistanceMeasure
    * @param convergenceDelta the convergence delta value
-   * @param jarLocation      The location of the mahout jar
    * @return true if the iteration successfully runs
    */
   static boolean runIteration(String input, String clustersIn,
-                              String clustersOut, String measureClass, String convergenceDelta, String jarLocation) {
+                              String clustersOut, String measureClass, String convergenceDelta) {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(KMeansDriver.class);
-    conf.setJar(jarLocation);
 
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Text.class);
@@ -141,13 +134,11 @@
    * @param output           the directory pathname for output points
    * @param measureClass     the classname of the DistanceMeasure
    * @param convergenceDelta the convergence delta value
-   * @param jarLocation      The location of the mahout jar
    */
   static void runClustering(String input, String clustersIn, String output,
-                            String measureClass, String convergenceDelta, String jarLocation) {
+                            String measureClass, String convergenceDelta) {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(KMeansDriver.class);
-    conf.setJar(jarLocation);
 
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Text.class);

Modified: lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Fri Mar 14 07:28:54 2008
@@ -466,8 +466,7 @@
     writePointsToFile(points, "testdata/file2");
     // now run the Canopy Driver
     CanopyDriver.runJob("testdata", "output/canopies",
-            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
 
     // verify output from sequence file
     JobConf job = new JobConf(
@@ -502,8 +501,7 @@
     writePointsToFile(points, "testdata/file2");
     // now run the Canopy Driver
     CanopyDriver.runJob("testdata", "output/canopies",
-            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
 
     // verify output from sequence file
     JobConf job = new JobConf(
@@ -676,8 +674,7 @@
     writePointsToFile(points, "testdata/file2");
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
     BufferedReader reader = new BufferedReader(new FileReader(
             "output/clusters/part-00000"));
     int count = 0;
@@ -705,8 +702,7 @@
     writePointsToFile(points, "testdata/file2");
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
     BufferedReader reader = new BufferedReader(new FileReader(
             "output/clusters/part-00000"));
     int count = 0;
@@ -736,8 +732,7 @@
     writePointsToFileWithPayload(points, "testdata/file2", "file2");
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
     BufferedReader reader = new BufferedReader(new FileReader(
             "output/clusters/part-00000"));
     int count = 0;
@@ -769,8 +764,7 @@
     writePointsToFileWithPayload(points, "testdata/file2", "file2");
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            EuclideanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
     BufferedReader reader = new BufferedReader(new FileReader(
             "output/clusters/part-00000"));
     int count = 0;
@@ -801,8 +795,7 @@
     // now run the Canopy Driver. User defined measure happens to be a Manhattan
     // subclass so results are same.
     CanopyDriver.runJob("testdata", "output/canopies",
-            UserDefinedDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            UserDefinedDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
 
     // verify output from sequence file
     JobConf job = new JobConf(

Modified: lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=637111&r1=637110&r2=637111&view=diff
==============================================================================
--- lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Fri Mar 14 07:28:54 2008
@@ -372,9 +372,8 @@
       writer.close();
 
       // now run the Job
-      String jarLocation = "dist/apache-mahout-0.1-dev.jar";
       KMeansDriver.runJob("testdata/points", "testdata/clusters", "output",
-              EuclideanDistanceMeasure.class.getName(), "0.001", "10", jarLocation);
+              EuclideanDistanceMeasure.class.getName(), "0.001", "10");
 
       // now compare the expected clusters with actual
       File outDir = new File("output/points");
@@ -422,13 +421,11 @@
 
     // now run the Canopy job
     CanopyDriver.runJob("testdata/points", "testdata/canopies",
-            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1,
-            "dist/apache-mahout-0.1-dev.jar");
+            ManhattanDistanceMeasure.class.getName(), (float) 3.1, (float) 2.1);
 
     // now run the KMeans job
-    String jarLocation = "dist/apache-mahout-0.1-dev.jar";
     KMeansDriver.runJob("testdata/points", "testdata/canopies", "output",
-            EuclideanDistanceMeasure.class.getName(), "0.001", "10", jarLocation);
+            EuclideanDistanceMeasure.class.getName(), "0.001", "10");
 
     // now compare the expected clusters with actual
     File outDir = new File("output/points");