You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/06/13 13:17:26 UTC

svn commit: r1602375 - in /nutch/trunk: CHANGES.txt src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

Author: jnioche
Date: Fri Jun 13 11:17:26 2014
New Revision: 1602375

URL: http://svn.apache.org/r1602375
Log:
NUTCH-1647 protocol-http throws 'unzipBestEffort returned null' for redirected pages (jnioche)

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1602375&r1=1602374&r2=1602375&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jun 13 11:17:26 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1647 protocol-http throws 'unzipBestEffort returned null' for redirected pages (jnioche)
+
 * NUTCH-1736 Can't fetch page if http response header contains Transfer-Encoding:chunked (ysc via jnioche)
 
 * NUTCH-1782 NodeWalker to return current node (markus)

Modified: nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1602375&r1=1602374&r2=1602375&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original)
+++ nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Fri Jun 13 11:17:26 2014
@@ -359,6 +359,11 @@ public abstract class HttpBase implement
 
     if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
 
+    // content can be empty (i.e. redirection) in which case
+    // there is nothing to unzip
+    if (compressed.length == 0)
+      return compressed;
+    
     byte[] content;
     if (getMaxContent() >= 0) {
         content = GZIPUtils.unzipBestEffort(compressed, getMaxContent());
@@ -379,6 +384,11 @@ public abstract class HttpBase implement
 
   public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
 
+    // content can be empty (i.e. redirection) in which case
+    // there is nothing to deflate
+    if (compressed.length == 0)
+      return compressed;
+    
     if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
 
     byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());