You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2009/06/07 19:12:19 UTC

svn commit: r782412 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/Crawl.java src/java/org/apache/nutch/util/NutchConfiguration.java

Author: dogacan
Date: Sun Jun  7 17:12:18 2009
New Revision: 782412

URL: http://svn.apache.org/viewvc?rev=782412&view=rev
Log:
NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when invoked using crawl command. Patch by Susam Pal.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Sun Jun  7 17:12:18 2009
@@ -1,5 +1,10 @@
 Nutch Change Log
 
+Unreleased Changes
+
+ 1. NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when
+    invoked using crawl command (Susam Pal via dogacan)
+
 Release 1.0 - 2009-03-23
 
  1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Sun Jun  7 17:12:18 2009
@@ -54,8 +54,7 @@
       return;
     }
 
-    Configuration conf = NutchConfiguration.create();
-    conf.addResource("crawl-tool.xml");
+    Configuration conf = NutchConfiguration.createCrawlConfiguration();
     JobConf job = new NutchJob(conf);
 
     Path rootUrlDir = null;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=782412&r1=782411&r2=782412&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java Sun Jun  7 17:12:18 2009
@@ -47,7 +47,17 @@
   /** Create a {@link Configuration} for Nutch. */
   public static Configuration create() {
     Configuration conf = new Configuration();
-    addNutchResources(conf);
+    addNutchResources(conf, false);
+    return conf;
+  }
+
+  /**
+   * Create a {@link Configuration for Nutch invoked with the command
+   * line crawl command, i.e. bin/nutch crawl ...
+   */
+  public static Configuration createCrawlConfiguration() {
+    Configuration conf = new Configuration();
+    addNutchResources(conf, true);
     return conf;
   }
 
@@ -79,12 +89,23 @@
     return conf;
   }
   
-  /** Add the standard Nutch resources to {@link Configuration}. */
-  public static Configuration addNutchResources(Configuration conf) {
+  /**
+   * Add the standard Nutch resources to {@link Configuration}.
+   * 
+   * @param conf               Configuration object to which
+   *                           configuration is to be added.
+   * @param crawlConfiguration Whether configuration for command line
+   *                           crawl using 'bin/nutch crawl' command
+   *                           should be added.
+   */
+  private static Configuration addNutchResources(Configuration conf,
+                                                 boolean crawlConfiguration) {
     conf.addResource("nutch-default.xml");
+    if (crawlConfiguration) {
+      conf.addResource("crawl-tool.xml");
+    }
     conf.addResource("nutch-site.xml");
     return conf;
   }
-  
 }