You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dr...@apache.org on 2010/07/13 04:35:04 UTC
svn commit: r963589 -
/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
Author: drew
Date: Tue Jul 13 02:35:03 2010
New Revision: 963589
URL: http://svn.apache.org/viewvc?rev=963589&view=rev
Log:
MAHOUT-167: moved setup of 'wikipedia.categories' property prior to Job creation so that it is propagated to members of the job.
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=963589&r1=963588&r2=963589&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Tue Jul 13 02:35:03 2010
@@ -164,6 +164,18 @@ public final class WikipediaDatasetCreat
// Dont ever forget this. People should keep track of how hadoop conf
// parameters can make or break a piece of code
+ Set<String> categories = new HashSet<String>();
+ for (String line : new FileLineIterable(new File(catFile))) {
+ categories.add(line.trim().toLowerCase(Locale.ENGLISH));
+ }
+
+ DefaultStringifier<Set<String>> setStringifier =
+ new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
+
+ String categoriesStr = setStringifier.toString(categories);
+
+ conf.set("wikipedia.categories", categoriesStr);
+
Job job = new Job(conf);
if (log.isInfoEnabled()) {
log.info("Input: {} Out: {} Categories: {}", new Object[] {input, output, catFile});
@@ -180,18 +192,6 @@ public final class WikipediaDatasetCreat
Path outPath = new Path(output);
FileOutputFormat.setOutputPath(job, outPath);
HadoopUtil.overwriteOutput(outPath);
-
- Set<String> categories = new HashSet<String>();
- for (String line : new FileLineIterable(new File(catFile))) {
- categories.add(line.trim().toLowerCase(Locale.ENGLISH));
- }
-
- DefaultStringifier<Set<String>> setStringifier =
- new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
-
- String categoriesStr = setStringifier.toString(categories);
-
- conf.set("wikipedia.categories", categoriesStr);
job.waitForCompletion(true);
}