You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2017/11/06 16:08:39 UTC

[nutch] branch master updated: NUTCH-2420 Bug in variable generate.max.count and fetcher.server.delay

This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 6199492  NUTCH-2420 Bug in variable generate.max.count and fetcher.server.delay
6199492 is described below

commit 6199492f5e1e8811022257c88dbf63f1e1c739d0
Author: Markus Jelsma <ma...@apache.org>
AuthorDate: Mon Nov 6 17:08:09 2017 +0100

    NUTCH-2420 Bug in variable generate.max.count and fetcher.server.delay
---
 src/java/org/apache/nutch/crawl/Generator.java | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 21607ec..e5f4831 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -179,11 +179,16 @@ public class Generator extends NutchTool implements Tool {
       segCounts = new int[maxNumSegments];
       
       if (job.get(GENERATOR_HOSTDB) != null) {
+        maxCountExpr = JexlUtil.parseExpression(job.get(GENERATOR_MAX_COUNT_EXPR, null));
+        fetchDelayExpr = JexlUtil.parseExpression(job.get(GENERATOR_FETCH_DELAY_EXPR, null));
+      }
+    }
+    
+    public void open() {
+      if (conf.get(GENERATOR_HOSTDB) != null) {
         try {
-          Path path = new Path(job.get(GENERATOR_HOSTDB), "current");
-          hostdbReaders = SequenceFileOutputFormat.getReaders(job, path);
-          maxCountExpr = JexlUtil.parseExpression(job.get(GENERATOR_MAX_COUNT_EXPR, null));
-          fetchDelayExpr = JexlUtil.parseExpression(job.get(GENERATOR_FETCH_DELAY_EXPR, null));
+          Path path = new Path(conf.get(GENERATOR_HOSTDB), "current");
+          hostdbReaders = SequenceFileOutputFormat.getReaders(conf, path);
         } catch (IOException e) {
           LOG.error("Error reading HostDB because {}", e.getMessage());
         }
@@ -287,14 +292,17 @@ public class Generator extends NutchTool implements Tool {
       Text key = new Text();
       HostDatum value = new HostDatum();
       
+      open();
       for (int i = 0; i < hostdbReaders.length; i++) {
         while (hostdbReaders[i].next(key, value)) {
           if (host.equals(key.toString())) {
+            close();
             return value;
           }
         }
       }
       
+      close();
       return null;
     }
     

-- 
To stop receiving notification emails like this one, please contact
['"commits@nutch.apache.org" <co...@nutch.apache.org>'].