You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2013/05/20 15:44:12 UTC

svn commit: r1484482 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorJob.java

Author: fenglu
Date: Mon May 20 13:44:11 2013
New Revision: 1484482

URL: http://svn.apache.org/r1484482
Log:
NUTCH-1563 FetchSchedule#getFields is never used by GeneraterJob

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1484482&r1=1484481&r2=1484482&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon May 20 13:44:11 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* NUTCH-1563 FetchSchedule#getFields is never used by GeneratorJob (Feng)
+
 * NUTCH-1573 Upgrade to most recent JUnit 4.x to improve test flexibility (lewismc)
 
 * Added crawler-commons dependency in pom.xml (tejasp)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java?rev=1484482&r1=1484481&r2=1484482&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java Mon May 20 13:44:11 2013
@@ -24,7 +24,9 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
+import java.util.Collection;
 
+import org.apache.hadoop.mapreduce.Job;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -152,6 +154,12 @@ public class GeneratorJob extends NutchT
     setConf(conf);
   }
 
+  public Collection<WebPage.Field> getFields(Job job) {
+    Collection<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
+    fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration()).getFields());
+    return fields;
+  }
+
   public Map<String,Object> run(Map<String,Object> args) throws Exception {
     // map to inverted subset due for fetch, sort by score
     Long topN = (Long)args.get(Nutch.ARG_TOPN);
@@ -187,7 +195,8 @@ public class GeneratorJob extends NutchT
     numJobs = 1;
     currentJobNum = 0;
     currentJob = new NutchJob(getConf(), "generate: " + batchId);
-    StorageUtils.initMapperJob(currentJob, FIELDS, SelectorEntry.class,
+    Collection<WebPage.Field> fields = getFields(currentJob);
+    StorageUtils.initMapperJob(currentJob, fields, SelectorEntry.class,
         WebPage.class, GeneratorMapper.class, SelectorEntryPartitioner.class, true);
     StorageUtils.initReducerJob(currentJob, GeneratorReducer.class);
     currentJob.waitForCompletion(true);