You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/01/13 14:17:01 UTC
svn commit: r1557705 - in /nutch/branches/2.x: CHANGES.txt
src/java/org/apache/nutch/crawl/DbUpdaterJob.java
Author: lewismc
Date: Mon Jan 13 13:17:00 2014
New Revision: 1557705
URL: http://svn.apache.org/r1557705
Log:
NUTCH-1667 Updatedb always ignore batchId
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdaterJob.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1557705&r1=1557704&r2=1557705&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jan 13 13:17:00 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1667 Updatedb always ignore batchId (Tien Nguyen Manh via lewismc)
+
* NUTCH-1695 NutchDocument.toString() (markus via lewismc)
* NUTCH-1696 Enable use of (Gora) SNAPSHOT dependencies (lewismc)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdaterJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdaterJob.java?rev=1557705&r1=1557704&r2=1557705&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdaterJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdaterJob.java Mon Jan 13 13:17:00 2014
@@ -80,10 +80,7 @@ public class DbUpdaterJob extends NutchT
String batchId = (String)args.get(Nutch.ARG_BATCH);
numJobs = 1;
currentJobNum = 0;
- currentJob = new NutchJob(getConf(), "update-table");
- if (crawlId != null) {
- currentJob.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
- }
+
if (batchId == null) {
batchId = Nutch.ALL_BATCH_ID_STR;
}
@@ -93,6 +90,11 @@ public class DbUpdaterJob extends NutchT
HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
fields.addAll(scoringFilters.getFields());
+ currentJob = new NutchJob(getConf(), "update-table");
+ if (crawlId != null) {
+ currentJob.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
+ }
+
// Partition by {url}, sort by {url,score} and group by {url}.
// This ensures that the inlinks are sorted by score when they enter
// the reducer.