You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/06/11 17:49:45 UTC
svn commit: r1601935 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/metadata/HttpHeaders.java
src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
Author: jnioche
Date: Wed Jun 11 15:49:45 2014
New Revision: 1601935
URL: http://svn.apache.org/r1601935
Log:
NUTCH-1736 Can't fetch page if http response header contains Transfer-Encoding:chunked
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1601935&r1=1601934&r2=1601935&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Jun 11 15:49:45 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1736 Can't fetch page if http response header contains Transfer-Encodingï¼chunked (ysc via jnioche)
+
* NUTCH-1782 NodeWalker to return current node (markus)
* NUTCH-1758 IndexChecker to send document to IndexWriters (jnioche)
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1601935&r1=1601934&r2=1601935&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Wed Jun 11 15:49:45 2014
@@ -26,6 +26,8 @@ import org.apache.hadoop.io.Text;
*/
public interface HttpHeaders {
+ public final static String TRANSFER_ENCODING = "Transfer-Encoding";
+
public final static String CONTENT_ENCODING = "Content-Encoding";
public final static String CONTENT_LANGUAGE = "Content-Language";
Modified: nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1601935&r1=1601934&r2=1601935&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original)
+++ nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Wed Jun 11 15:49:45 2014
@@ -213,8 +213,13 @@ public class HttpResponse implements Res
parseHeaders(in, line);
haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
}
-
- readPlainContent(in);
+ String transferEncoding = getHeader(Response.TRANSFER_ENCODING);
+ if (transferEncoding != null
+ && "chunked".equalsIgnoreCase(transferEncoding.trim())) {
+ readChunkedContent(in, line);
+ } else {
+ readPlainContent(in);
+ }
String contentEncoding = getHeader(Response.CONTENT_ENCODING);
if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
@@ -339,7 +344,7 @@ public class HttpResponse implements Res
break;
}
- if ( (contentBytesRead + chunkLen) > http.getMaxContent() )
+ if ( http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent() )
chunkLen= http.getMaxContent() - contentBytesRead;
// read one chunk