You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/06/14 14:34:09 UTC

svn commit: r1350213 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/fetcher/FetcherJob.java src/java/org/apache/nutch/parse/ParserJob.java

Author: lewismc
Date: Thu Jun 14 12:34:08 2012
New Revision: 1350213

URL: http://svn.apache.org/viewvc?rev=1350213&view=rev
Log:
-force and -resume arguments being ignored in ParserJob NUTCH-1392

Modified:
    nutch/branches/nutchgora/CHANGES.txt
    nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
    nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Jun 14 12:34:08 2012
@@ -3,6 +3,8 @@ Nutch Change Log
 Release 2.0 (08/06/2012) ddmmyyy
 Full Jira report - https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680&version=12314893
 
+* NUTCH-1392 -force and -resume arguments being ignored in ParserJob (ferdy via lewismc)
+
 * NUTCH-1379 NPE when reprUrl is null in ParseUtil (ferdy)
 
 * NUTCH-1378 HostDb NullPointerException (ferdy)

Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherJob.java Thu Jun 14 12:34:08 2012
@@ -265,7 +265,7 @@ public class FetcherJob extends NutchToo
     String batchId;
 
     String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
-      "[-threads N] [-parse] \n \t \t  [-resume] [-numTasks N]\n" +
+      "[-threads N] \n \t \t  [-resume] [-numTasks N]\n" +
       "    <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n" +
       "    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n" +
       "    -threads N    - number of fetching threads per task\n" +

Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java?rev=1350213&r1=1350212&r2=1350213&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserJob.java Thu Jun 14 12:34:08 2012
@@ -231,6 +231,13 @@ public class ParserJob extends NutchTool
     if (force != null) {
       getConf().setBoolean(FORCE_KEY, force);
     }
+    LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
+    LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
+    if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
+      LOG.info("ParserJob: parsing all");
+    } else {
+      LOG.info("ParserJob: batchId:\t" + batchId);
+    }
     currentJob = new NutchJob(getConf(), "parse");
     
     Collection<WebPage.Field> fields = getFields(currentJob);
@@ -247,13 +254,6 @@ public class ParserJob extends NutchTool
   public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
     LOG.info("ParserJob: starting");
 
-    LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
-    LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
-    if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
-      LOG.info("ParserJob: parsing all");
-    } else {
-      LOG.info("ParserJob: batchId:\t" + batchId);
-    }
     run(ToolUtil.toArgMap(
         Nutch.ARG_BATCH, batchId,
         Nutch.ARG_RESUME, shouldResume,