You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/12/08 20:36:22 UTC
svn commit: r1643893 - in /nutch/branches/2.x: CHANGES.txt
src/java/org/apache/nutch/crawl/GeneratorJob.java
Author: snagel
Date: Mon Dec 8 19:36:22 2014
New Revision: 1643893
URL: http://svn.apache.org/r1643893
Log:
NUTCH-1778 Generator not logging number of URLs in batch correctly
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1643893&r1=1643892&r2=1643893&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Dec 8 19:36:22 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development 2.3-SNAPSHOT
+* NUTCH-1778 Generator not logging number of URLs in batch correctly (jnioche via snagel)
+
* NUTCH-1877 Suffix URL filter to ignore query string by default (markus via snagel)
* NUTCH-1825 protocol-http may hang for certain web pages (Phu Kieu via snagel)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java?rev=1643893&r1=1643892&r2=1643893&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java Mon Dec 8 19:36:22 2014
@@ -61,6 +61,7 @@ public class GeneratorJob extends NutchT
public static final String GENERATOR_DELAY = "crawl.gen.delay";
public static final String GENERATOR_RANDOM_SEED = "generate.partition.seed";
public static final String BATCH_ID = "generate.batch.id";
+ public static final String GENERATE_COUNT = "generate.count";
private static final Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
@@ -204,6 +205,9 @@ public class GeneratorJob extends NutchT
currentJob.waitForCompletion(true);
ToolUtil.recordJobStatus(null, currentJob, results);
results.put(BATCH_ID, getConf().get(BATCH_ID));
+ long generateCount = currentJob.getCounters()
+ .findCounter("Generator", "GENERATE_MARK").getValue();
+ results.put(GENERATE_COUNT, generateCount);
return results;
}
@@ -225,15 +229,16 @@ public class GeneratorJob extends NutchT
if (topN != Long.MAX_VALUE) {
LOG.info("GeneratorJob: topN: " + topN);
}
- run(ToolUtil.toArgMap(
+ Map<String,Object> results = run(ToolUtil.toArgMap(
Nutch.ARG_TOPN, topN,
Nutch.ARG_CURTIME, curTime,
Nutch.ARG_FILTER, filter,
Nutch.ARG_NORMALIZE, norm));
String batchId = getConf().get(BATCH_ID);
long finish = System.currentTimeMillis();
+ long generateCount = (Long) results.get(GENERATE_COUNT);
LOG.info("GeneratorJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
- LOG.info("GeneratorJob: generated batch id: " + batchId + " containing " + GeneratorReducer.count + " URLs");
+ LOG.info("GeneratorJob: generated batch id: " + batchId + " containing " + generateCount + " URLs");
return batchId;
}