You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/09/23 21:00:14 UTC

svn commit: r449289 - in /lucene/nutch/branches/branch-0.8: CHANGES.txt src/java/org/apache/nutch/crawl/Crawl.java src/java/org/apache/nutch/fetcher/Fetcher.java

Author: ab
Date: Sat Sep 23 12:00:13 2006
New Revision: 449289

URL: http://svn.apache.org/viewvc?view=rev&rev=449289
Log:
NUTCH-337: obey "fetcher.parse" property if -noParsing is not specified.

Modified:
    lucene/nutch/branches/branch-0.8/CHANGES.txt
    lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java
    lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/CHANGES.txt?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/CHANGES.txt (original)
+++ lucene/nutch/branches/branch-0.8/CHANGES.txt Sat Sep 23 12:00:13 2006
@@ -35,6 +35,9 @@
 11. NUTCH-336 - Differentiate between newly discovered pages and newly
     injected pages (Chris Schneider via ab) NOTE: this changes the
     scoring API, filter implementations need to be updated.
+
+12. NUTCH-337 - Fetcher ignores the fetcher.parse value (Stefan Groschupf
+    via ab)
     
 Release 0.8 - 2006-07-25
 

Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java Sat Sep 23 12:00:13 2006
@@ -108,7 +108,7 @@
       Path segment =
         new Generator(job).generate(crawlDb, segments, -1,
                                      topN, System.currentTimeMillis());
-      new Fetcher(job).fetch(segment, threads, Fetcher.isParsing(job));  // fetch it
+      new Fetcher(job).fetch(segment, threads);  // fetch it
       if (!Fetcher.isParsing(job)) {
         new ParseSegment(job).parse(segment);    // parse it, if needed
       }

Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java?view=diff&rev=449289&r1=449288&r2=449289
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Sep 23 12:00:13 2006
@@ -405,7 +405,7 @@
     
   }
 
-  public void fetch(Path segment, int threads, boolean parsing)
+  public void fetch(Path segment, int threads)
     throws IOException {
 
     if (LOG.isInfoEnabled()) {
@@ -418,7 +418,6 @@
 
     job.setInt("fetcher.threads.fetch", threads);
     job.set(SEGMENT_NAME_KEY, segment.getName());
-    job.setBoolean("fetcher.parse", parsing);
 
     // for politeness, don't permit parallel execution of a single task
     job.setSpeculativeExecution(false);
@@ -469,7 +468,7 @@
     }
     Fetcher fetcher = new Fetcher(conf);          // make a Fetcher
     
-    fetcher.fetch(segment, threads, parsing);              // run the Fetcher
+    fetcher.fetch(segment, threads);              // run the Fetcher
 
   }
 }