You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/11/07 18:53:57 UTC

svn commit: r331555 - in /lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient: HttpResponse.java RobotRulesParser.java

Author: cutting
Date: Mon Nov  7 09:53:54 2005
New Revision: 331555

URL: http://svn.apache.org/viewcvs?rev=331555&view=rev
Log:
NUTCH-124: Follow redirects when fetching robots.txt.

Modified:
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=331555&r1=331554&r2=331555&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Mon Nov  7 09:53:54 2005
@@ -82,10 +82,14 @@
   }
 
   public HttpResponse(URL url) throws IOException {
+    this(url, false);
+  }
+
+  HttpResponse(URL url, boolean followRedirects) throws IOException {
     this.base = url.toString();
     this.orig = url.toString();
     GetMethod get = new GetMethod(this.orig);
-    get.setFollowRedirects(false);
+    get.setFollowRedirects(followRedirects);
     get.setRequestHeader("User-Agent", Http.AGENT_STRING);
     HttpMethodParams params = get.getParams();
     // some servers cannot digest the new protocol

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java?rev=331555&r1=331554&r2=331555&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java Mon Nov  7 09:53:54 2005
@@ -379,7 +379,8 @@
     if (robotRules == null) {                     // cache miss
       LOG.fine("cache miss " + url);
       try {
-        HttpResponse response = new HttpResponse(new URL(url, "/robots.txt"));
+        HttpResponse response = new HttpResponse(new URL(url, "/robots.txt"),
+                                                 true);
 
         if (response.getCode() == 200)               // found rules: parse them
           robotRules = new RobotRulesParser().parseRules(response.getContent());