You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/06/14 14:34:09 UTC
svn commit: r1350213 - in /nutch/branches/nutchgora: CHANGES.txt
src/java/org/apache/nutch/fetcher/FetcherJob.java
src/java/org/apache/nutch/parse/ParserJob.java
Author: lewismc
Date: Thu Jun 14 12:34:08 2012
New Revision: 1350213
URL: http://svn.apache.org/viewvc?rev=1350213&view=rev
Log:
-force and -resume arguments being ignored in ParserJob NUTCH-1392
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Jun 14 12:34:08 2012
@@ -3,6 +3,8 @@ Nutch Change Log
Release 2.0 (08/06/2012) ddmmyyy
Full Jira report - https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680&version=12314893
+* NUTCH-1392 -force and -resume arguments being ignored in ParserJob (ferdy via lewismc)
+
* NUTCH-1379 NPE when reprUrl is null in ParseUtil (ferdy)
* NUTCH-1378 HostDb NullPointerException (ferdy)
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java Thu Jun 14 12:34:08 2012
@@ -265,7 +265,7 @@ public class FetcherJob extends NutchToo
String batchId;
String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
- "[-threads N] [-parse] \n \t \t [-resume] [-numTasks N]\n" +
+ "[-threads N] \n \t \t [-resume] [-numTasks N]\n" +
" <batchId> - crawl identifier returned by Generator, or -all for all \n \t \t generated batchId-s\n" +
" -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t (default: storage.crawl.id)\n" +
" -threads N - number of fetching threads per task\n" +
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java Thu Jun 14 12:34:08 2012
@@ -231,6 +231,13 @@ public class ParserJob extends NutchTool
if (force != null) {
getConf().setBoolean(FORCE_KEY, force);
}
+ LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
+ LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
+ if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
+ LOG.info("ParserJob: parsing all");
+ } else {
+ LOG.info("ParserJob: batchId:\t" + batchId);
+ }
currentJob = new NutchJob(getConf(), "parse");
Collection<WebPage.Field> fields = getFields(currentJob);
@@ -247,13 +254,6 @@ public class ParserJob extends NutchTool
public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
LOG.info("ParserJob: starting");
- LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
- LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
- if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
- LOG.info("ParserJob: parsing all");
- } else {
- LOG.info("ParserJob: batchId:\t" + batchId);
- }
run(ToolUtil.toArgMap(
Nutch.ARG_BATCH, batchId,
Nutch.ARG_RESUME, shouldResume,