You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/10/18 23:24:15 UTC
svn commit: r326252 -
/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java
Author: cutting
Date: Tue Oct 18 14:24:12 2005
New Revision: 326252
URL: http://svn.apache.org/viewcvs?rev=326252&view=rev
Log:
Delay reading numReduceTasks until job is running. This fixes a
bug when LocalJobRunner forces numReduceTasks to 1 after the job is
submitted.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java?rev=326252&r1=326251&r2=326252&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java Tue Oct 18 14:24:12 2005
@@ -45,7 +45,7 @@
public void configure(JobConf job) {
curTime = job.getLong("crawl.gen.curTime", System.currentTimeMillis());
- limit = job.getLong("crawl.gen.limit", Long.MAX_VALUE);
+ limit = job.getLong("crawl.topN",Long.MAX_VALUE)/job.getNumReduceTasks();
maxPerHost = job.getInt("generate.max.per.host", -1);
}
@@ -173,7 +173,7 @@
}
job.setLong("crawl.gen.curTime", curTime);
- job.setLong("crawl.gen.limit", topN / job.getNumReduceTasks());
+ job.setLong("crawl.topN", topN);
job.setInputDir(new File(dbDir, CrawlDatum.DB_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);