You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/09/23 21:00:14 UTC
svn commit: r449289 - in /lucene/nutch/branches/branch-0.8: CHANGES.txt
src/java/org/apache/nutch/crawl/Crawl.java
src/java/org/apache/nutch/fetcher/Fetcher.java
Author: ab
Date: Sat Sep 23 12:00:13 2006
New Revision: 449289
URL: http://svn.apache.org/viewvc?view=rev&rev=449289
Log:
NUTCH-337: obey "fetcher.parse" property if -noParsing is not specified.
Modified:
lucene/nutch/branches/branch-0.8/CHANGES.txt
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java
Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/CHANGES.txt?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/CHANGES.txt (original)
+++ lucene/nutch/branches/branch-0.8/CHANGES.txt Sat Sep 23 12:00:13 2006
@@ -35,6 +35,9 @@
11. NUTCH-336 - Differentiate between newly discovered pages and newly
injected pages (Chris Schneider via ab) NOTE: this changes the
scoring API, filter implementations need to be updated.
+
+12. NUTCH-337 - Fetcher ignores the fetcher.parse value (Stefan Groschupf
+ via ab)
Release 0.8 - 2006-07-25
Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java Sat Sep 23 12:00:13 2006
@@ -108,7 +108,7 @@
Path segment =
new Generator(job).generate(crawlDb, segments, -1,
topN, System.currentTimeMillis());
- new Fetcher(job).fetch(segment, threads, Fetcher.isParsing(job)); // fetch it
+ new Fetcher(job).fetch(segment, threads); // fetch it
if (!Fetcher.isParsing(job)) {
new ParseSegment(job).parse(segment); // parse it, if needed
}
Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Sep 23 12:00:13 2006
@@ -405,7 +405,7 @@
}
- public void fetch(Path segment, int threads, boolean parsing)
+ public void fetch(Path segment, int threads)
throws IOException {
if (LOG.isInfoEnabled()) {
@@ -418,7 +418,6 @@
job.setInt("fetcher.threads.fetch", threads);
job.set(SEGMENT_NAME_KEY, segment.getName());
- job.setBoolean("fetcher.parse", parsing);
// for politeness, don't permit parallel execution of a single task
job.setSpeculativeExecution(false);
@@ -469,7 +468,7 @@
}
Fetcher fetcher = new Fetcher(conf); // make a Fetcher
- fetcher.fetch(segment, threads, parsing); // run the Fetcher
+ fetcher.fetch(segment, threads); // run the Fetcher
}
}