You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 17:56:02 UTC
svn commit: r823600 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/fetcher/Fetcher.java
Author: ab
Date: Fri Oct 9 15:56:02 2009
New Revision: 823600
URL: http://svn.apache.org/viewvc?rev=823600&view=rev
Log:
NUTCH-679 Fetcher2 implementing Tool.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823600&r1=823599&r2=823600&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Oct 9 15:56:02 2009
@@ -26,6 +26,8 @@
* NUTCH-756 - CrawlDatum.set() does not reset Metadata if it is null (Julien Nioche
via ab)
+* NUTCH-679 - Fetcher2 implementing Tool (Julien Nioche via ab)
+
Release 1.0 - 2009-03-23
1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=823600&r1=823599&r2=823600&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Fri Oct 9 15:56:02 2009
@@ -35,6 +35,8 @@
import org.apache.hadoop.conf.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
@@ -83,7 +85,7 @@
*
* @author Andrzej Bialecki
*/
-public class Fetcher extends Configured implements
+public class Fetcher extends Configured implements Tool,
MapRunnable<Text, CrawlDatum, Text, NutchWritable> {
public static final int PERM_REFRESH_TIME = 5;
@@ -972,19 +974,22 @@
/** Run the fetcher. */
public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(NutchConfiguration.create(), new Fetcher(), args);
+ System.exit(res);
+ }
+
+ public int run(String[] args) throws Exception {
String usage = "Usage: Fetcher <segment> [-threads n] [-noParsing]";
if (args.length < 1) {
System.err.println(usage);
- System.exit(-1);
+ return -1;
}
Path segment = new Path(args[0]);
- Configuration conf = NutchConfiguration.create();
-
- int threads = conf.getInt("fetcher.threads.fetch", 10);
+ int threads = getConf().getInt("fetcher.threads.fetch", 10);
boolean parsing = true;
for (int i = 1; i < args.length; i++) { // parse command line
@@ -993,13 +998,17 @@
} else if (args[i].equals("-noParsing")) parsing = false;
}
- conf.setInt("fetcher.threads.fetch", threads);
+ getConf().setInt("fetcher.threads.fetch", threads);
if (!parsing) {
- conf.setBoolean("fetcher.parse", parsing);
+ getConf().setBoolean("fetcher.parse", parsing);
+ }
+ try {
+ fetch(segment, threads, parsing);
+ return 0;
+ } catch (Exception e) {
+ LOG.fatal("Fetcher: " + StringUtils.stringifyException(e));
+ return -1;
}
- Fetcher fetcher = new Fetcher(conf); // make a Fetcher
-
- fetcher.fetch(segment, threads, parsing); // run the Fetcher
}