You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2009/06/07 19:12:19 UTC
svn commit: r782412 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/crawl/Crawl.java
src/java/org/apache/nutch/util/NutchConfiguration.java
Author: dogacan
Date: Sun Jun 7 17:12:18 2009
New Revision: 782412
URL: http://svn.apache.org/viewvc?rev=782412&view=rev
Log:
NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when invoked using crawl command. Patch by Susam Pal.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Sun Jun 7 17:12:18 2009
@@ -1,5 +1,10 @@
Nutch Change Log
+Unreleased Changes
+
+ 1. NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when
+ invoked using crawl command (Susam Pal via dogacan)
+
Release 1.0 - 2009-03-23
1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Sun Jun 7 17:12:18 2009
@@ -54,8 +54,7 @@
return;
}
- Configuration conf = NutchConfiguration.create();
- conf.addResource("crawl-tool.xml");
+ Configuration conf = NutchConfiguration.createCrawlConfiguration();
JobConf job = new NutchJob(conf);
Path rootUrlDir = null;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java Sun Jun 7 17:12:18 2009
@@ -47,7 +47,17 @@
/** Create a {@link Configuration} for Nutch. */
public static Configuration create() {
Configuration conf = new Configuration();
- addNutchResources(conf);
+ addNutchResources(conf, false);
+ return conf;
+ }
+
+ /**
+ * Create a {@link Configuration for Nutch invoked with the command
+ * line crawl command, i.e. bin/nutch crawl ...
+ */
+ public static Configuration createCrawlConfiguration() {
+ Configuration conf = new Configuration();
+ addNutchResources(conf, true);
return conf;
}
@@ -79,12 +89,23 @@
return conf;
}
- /** Add the standard Nutch resources to {@link Configuration}. */
- public static Configuration addNutchResources(Configuration conf) {
+ /**
+ * Add the standard Nutch resources to {@link Configuration}.
+ *
+ * @param conf Configuration object to which
+ * configuration is to be added.
+ * @param crawlConfiguration Whether configuration for command line
+ * crawl using 'bin/nutch crawl' command
+ * should be added.
+ */
+ private static Configuration addNutchResources(Configuration conf,
+ boolean crawlConfiguration) {
conf.addResource("nutch-default.xml");
+ if (crawlConfiguration) {
+ conf.addResource("crawl-tool.xml");
+ }
conf.addResource("nutch-site.xml");
return conf;
}
-
}