You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2016/01/07 21:57:13 UTC

svn commit: r1723626 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorJob.java

Author: snagel
Date: Thu Jan  7 20:57:13 2016
New Revision: 1723626

URL: http://svn.apache.org/viewvc?rev=1723626&view=rev
Log:
NUTCH-2143 GeneratorJob ignores batch id passed as argument

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1723626&r1=1723625&r2=1723626&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Jan  7 20:57:13 2016
@@ -3,6 +3,8 @@ Nutch Change Log
 Nutch 2.3.1 Release 22092015 (ddmmyyyy)
 Release Report - http://s.apache.org/nutch_2.3.1
 
+* NUTCH-2143 GeneratorJob ignores batch id passed as argument (liuqibj, lewismc, snagel)
+
 * NUTCH-2042 parse-html increase chunk size used to detect charset (snagel)
 
 * NUTCH-2107 plugin.xml to validate against plugin.dtd (snagel)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java?rev=1723626&r1=1723625&r2=1723626&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java Thu Jan  7 20:57:13 2016
@@ -163,17 +163,20 @@ public class GeneratorJob extends NutchT
     return fields;
   }
 
+  /** Generate a random batch id */
+  public static String randomBatchId() {
+    long curTime = System.currentTimeMillis();
+    int randomSeed = Math.abs(new Random().nextInt());
+    String batchId = (curTime / 1000) + "-" + randomSeed;
+    return batchId;
+  }
+  
   public Map<String, Object> run(Map<String, Object> args) throws Exception {
     String batchId = (String) args.get(Nutch.ARG_BATCH);
-    if (batchId != null) {
-      getConf().set(GeneratorJob.BATCH_ID, batchId);
-    } else {
-      // generate batchId
-      long curTime = System.currentTimeMillis();
-      int randomSeed = Math.abs(new Random().nextInt());
-      batchId = (curTime / 1000) + "-" + randomSeed;
-      getConf().set(BATCH_ID, batchId);
+    if (batchId == null) {
+      batchId = randomBatchId();
     }
+    getConf().set(BATCH_ID, batchId);
 
     // map to inverted subset due for fetch, sort by score
     Long topN = null;
@@ -249,10 +252,15 @@ public class GeneratorJob extends NutchT
     if (topN != Long.MAX_VALUE) {
       LOG.info("GeneratorJob: topN: " + topN);
     }
+    String batchId = getConf().get(BATCH_ID);
     Map<String, Object> results = run(ToolUtil.toArgMap(Nutch.ARG_TOPN, topN,
         Nutch.ARG_CURTIME, curTime, Nutch.ARG_FILTER, filter,
-        Nutch.ARG_NORMALIZE, norm));
-    String batchId = getConf().get(BATCH_ID);
+        Nutch.ARG_NORMALIZE, norm, Nutch.ARG_BATCH, batchId));
+    if (batchId == null) {
+      // use generated random batch id
+      batchId = (String) results.get(BATCH_ID);
+    }
+
     long finish = System.currentTimeMillis();
     long generateCount = (Long) results.get(GENERATE_COUNT);
     LOG.info("GeneratorJob: finished at " + sdf.format(finish)
@@ -290,11 +298,6 @@ public class GeneratorJob extends NutchT
     long curTime = System.currentTimeMillis(), topN = Long.MAX_VALUE;
     boolean filter = true, norm = true;
 
-    // generate batchId
-    int randomSeed = Math.abs(new Random().nextInt());
-    String batchId = (curTime / 1000) + "-" + randomSeed;
-    getConf().set(BATCH_ID, batchId);
-
     for (int i = 0; i < args.length; i++) {
       if ("-topN".equals(args[i])) {
         topN = Long.parseLong(args[++i]);
@@ -307,9 +310,9 @@ public class GeneratorJob extends NutchT
       } else if ("-adddays".equals(args[i])) {
         long numDays = Integer.parseInt(args[++i]);
         curTime += numDays * 1000L * 60 * 60 * 24;
-      } else if ("-batchId".equals(args[i]))
+      } else if ("-batchId".equals(args[i])) {
         getConf().set(BATCH_ID, args[++i]);
-      else {
+      } else {
         System.err.println("Unrecognized arg " + args[i]);
         return -1;
       }