You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 17:56:02 UTC

svn commit: r823600 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/fetcher/Fetcher.java

Author: ab
Date: Fri Oct  9 15:56:02 2009
New Revision: 823600

URL: http://svn.apache.org/viewvc?rev=823600&view=rev
Log:
NUTCH-679 Fetcher2 implementing Tool.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823600&r1=823599&r2=823600&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Oct  9 15:56:02 2009
@@ -26,6 +26,8 @@
 * NUTCH-756 - CrawlDatum.set() does not reset Metadata if it is null (Julien Nioche
   via ab)
 
+* NUTCH-679 - Fetcher2 implementing Tool (Julien Nioche via ab)
+
 Release 1.0 - 2009-03-23
 
  1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=823600&r1=823599&r2=823600&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Fri Oct  9 15:56:02 2009
@@ -35,6 +35,8 @@
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.NutchWritable;
@@ -83,7 +85,7 @@
  * 
  * @author Andrzej Bialecki
  */
-public class Fetcher extends Configured implements
+public class Fetcher extends Configured implements Tool,
     MapRunnable<Text, CrawlDatum, Text, NutchWritable> { 
 
   public static final int PERM_REFRESH_TIME = 5;
@@ -972,19 +974,22 @@
 
   /** Run the fetcher. */
   public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(NutchConfiguration.create(), new Fetcher(), args);
+    System.exit(res);
+  }
+  
+  public int run(String[] args) throws Exception {
 
     String usage = "Usage: Fetcher <segment> [-threads n] [-noParsing]";
 
     if (args.length < 1) {
       System.err.println(usage);
-      System.exit(-1);
+      return -1;
     }
       
     Path segment = new Path(args[0]);
 
-    Configuration conf = NutchConfiguration.create();
-
-    int threads = conf.getInt("fetcher.threads.fetch", 10);
+    int threads = getConf().getInt("fetcher.threads.fetch", 10);
     boolean parsing = true;
 
     for (int i = 1; i < args.length; i++) {       // parse command line
@@ -993,13 +998,17 @@
       } else if (args[i].equals("-noParsing")) parsing = false;
     }
 
-    conf.setInt("fetcher.threads.fetch", threads);
+    getConf().setInt("fetcher.threads.fetch", threads);
     if (!parsing) {
-      conf.setBoolean("fetcher.parse", parsing);
+      getConf().setBoolean("fetcher.parse", parsing);
+    }
+    try {
+      fetch(segment, threads, parsing);
+      return 0;
+    } catch (Exception e) {
+      LOG.fatal("Fetcher: " + StringUtils.stringifyException(e));
+      return -1;
     }
-    Fetcher fetcher = new Fetcher(conf);          // make a Fetcher
-    
-    fetcher.fetch(segment, threads, parsing);              // run the Fetcher
 
   }