You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dr...@apache.org on 2010/07/13 04:35:04 UTC

svn commit: r963589 - /mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java

Author: drew
Date: Tue Jul 13 02:35:03 2010
New Revision: 963589

URL: http://svn.apache.org/viewvc?rev=963589&view=rev
Log:
MAHOUT-167: moved setup of 'wikipedia.categories' property prior to Job creation so that it is propagated to members of the job. 

Modified:
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=963589&r1=963588&r2=963589&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Tue Jul 13 02:35:03 2010
@@ -164,6 +164,18 @@ public final class WikipediaDatasetCreat
     // Dont ever forget this. People should keep track of how hadoop conf
     // parameters can make or break a piece of code
     
+    Set<String> categories = new HashSet<String>();
+    for (String line : new FileLineIterable(new File(catFile))) {
+      categories.add(line.trim().toLowerCase(Locale.ENGLISH));
+    }
+    
+    DefaultStringifier<Set<String>> setStringifier =
+        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
+    
+    String categoriesStr = setStringifier.toString(categories);
+    
+    conf.set("wikipedia.categories", categoriesStr);
+    
     Job job = new Job(conf);
     if (log.isInfoEnabled()) {
       log.info("Input: {} Out: {} Categories: {}", new Object[] {input, output, catFile});
@@ -180,18 +192,6 @@ public final class WikipediaDatasetCreat
     Path outPath = new Path(output);
     FileOutputFormat.setOutputPath(job, outPath);
     HadoopUtil.overwriteOutput(outPath);
-
-    Set<String> categories = new HashSet<String>();
-    for (String line : new FileLineIterable(new File(catFile))) {
-      categories.add(line.trim().toLowerCase(Locale.ENGLISH));
-    }
-    
-    DefaultStringifier<Set<String>> setStringifier =
-        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
-    
-    String categoriesStr = setStringifier.toString(categories);
-    
-    conf.set("wikipedia.categories", categoriesStr);
     
     job.waitForCompletion(true);
   }