You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2015/02/12 12:28:51 UTC

svn commit: r1659227 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/fetcher/Fetcher.java

Author: snagel
Date: Thu Feb 12 11:28:50 2015
New Revision: 1659227

URL: http://svn.apache.org/r1659227
Log:
NUTCH-1939 Fetcher fails to follow redirects

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1659227&r1=1659226&r2=1659227&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Feb 12 11:28:50 2015
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1939 Fetcher fails to follow redirects (Leo Ye via snagel)
+
 * NUTCH-1913 LinkDB to implement db.ignore.external.links (markus, snagel)
 
 * NUTCH-1925 Upgrade to Apache Tika 1.7 (Tyler Palsulich via markus)

Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1659227&r1=1659226&r2=1659227&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Thu Feb 12 11:28:50 2015
@@ -786,7 +786,7 @@ public class Fetcher extends Configured
                       newUrl, refreshTime < Fetcher.PERM_REFRESH_TIME,
                       Fetcher.CONTENT_REDIR);
                   if (redirUrl != null) {
-                    queueRedirect(redirUrl, fit);
+                    fit = queueRedirect(redirUrl, fit);
                   }
                 }
                 break;
@@ -807,7 +807,7 @@ public class Fetcher extends Configured
                 Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
                     newUrl, temp, Fetcher.PROTOCOL_REDIR);
                 if (redirUrl != null) {
-                  queueRedirect(redirUrl, fit);
+                  fit = queueRedirect(redirUrl, fit);
                 } else {
                   // stop redirecting
                   redirecting = false;
@@ -946,7 +946,7 @@ public class Fetcher extends Configured
       }
     }
 
-    private void queueRedirect(Text redirUrl, FetchItem fit)
+    private FetchItem queueRedirect(Text redirUrl, FetchItem fit)
         throws ScoringFilterException {
       CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
           fit.datum.getFetchInterval(), fit.datum.getScore());
@@ -967,6 +967,7 @@ public class Fetcher extends Configured
         reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect",
             1);
       }
+      return fit;
     }
 
     private void logError(Text url, String message) {