You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/06/13 13:17:26 UTC
svn commit: r1602375 - in /nutch/trunk: CHANGES.txt
src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
Author: jnioche
Date: Fri Jun 13 11:17:26 2014
New Revision: 1602375
URL: http://svn.apache.org/r1602375
Log:
NUTCH-1647 protocol-http throws 'unzipBestEffort returned null' for redirected pages (jnioche)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1602375&r1=1602374&r2=1602375&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jun 13 11:17:26 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1647 protocol-http throws 'unzipBestEffort returned null' for redirected pages (jnioche)
+
* NUTCH-1736 Can't fetch page if http response header contains Transfer-Encodingï¼chunked (ysc via jnioche)
* NUTCH-1782 NodeWalker to return current node (markus)
Modified: nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1602375&r1=1602374&r2=1602375&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original)
+++ nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Fri Jun 13 11:17:26 2014
@@ -359,6 +359,11 @@ public abstract class HttpBase implement
if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
+ // content can be empty (i.e. redirection) in which case
+ // there is nothing to unzip
+ if (compressed.length == 0)
+ return compressed;
+
byte[] content;
if (getMaxContent() >= 0) {
content = GZIPUtils.unzipBestEffort(compressed, getMaxContent());
@@ -379,6 +384,11 @@ public abstract class HttpBase implement
public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
+ // content can be empty (i.e. redirection) in which case
+ // there is nothing to deflate
+ if (compressed.length == 0)
+ return compressed;
+
if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());