You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2017/11/06 16:08:39 UTC
[nutch] branch master updated: NUTCH-2420 Bug in variable
generate.max.count and fetcher.server.delay
This is an automated email from the ASF dual-hosted git repository.
markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 6199492 NUTCH-2420 Bug in variable generate.max.count and fetcher.server.delay
6199492 is described below
commit 6199492f5e1e8811022257c88dbf63f1e1c739d0
Author: Markus Jelsma <ma...@apache.org>
AuthorDate: Mon Nov 6 17:08:09 2017 +0100
NUTCH-2420 Bug in variable generate.max.count and fetcher.server.delay
---
src/java/org/apache/nutch/crawl/Generator.java | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 21607ec..e5f4831 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -179,11 +179,16 @@ public class Generator extends NutchTool implements Tool {
segCounts = new int[maxNumSegments];
if (job.get(GENERATOR_HOSTDB) != null) {
+ maxCountExpr = JexlUtil.parseExpression(job.get(GENERATOR_MAX_COUNT_EXPR, null));
+ fetchDelayExpr = JexlUtil.parseExpression(job.get(GENERATOR_FETCH_DELAY_EXPR, null));
+ }
+ }
+
+ public void open() {
+ if (conf.get(GENERATOR_HOSTDB) != null) {
try {
- Path path = new Path(job.get(GENERATOR_HOSTDB), "current");
- hostdbReaders = SequenceFileOutputFormat.getReaders(job, path);
- maxCountExpr = JexlUtil.parseExpression(job.get(GENERATOR_MAX_COUNT_EXPR, null));
- fetchDelayExpr = JexlUtil.parseExpression(job.get(GENERATOR_FETCH_DELAY_EXPR, null));
+ Path path = new Path(conf.get(GENERATOR_HOSTDB), "current");
+ hostdbReaders = SequenceFileOutputFormat.getReaders(conf, path);
} catch (IOException e) {
LOG.error("Error reading HostDB because {}", e.getMessage());
}
@@ -287,14 +292,17 @@ public class Generator extends NutchTool implements Tool {
Text key = new Text();
HostDatum value = new HostDatum();
+ open();
for (int i = 0; i < hostdbReaders.length; i++) {
while (hostdbReaders[i].next(key, value)) {
if (host.equals(key.toString())) {
+ close();
return value;
}
}
}
+ close();
return null;
}
--
To stop receiving notification emails like this one, please contact
['"commits@nutch.apache.org" <co...@nutch.apache.org>'].