You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ta...@apache.org on 2023/09/13 16:55:56 UTC
[nutch] branch master updated: NUTCH-3001 - fix logic for grabbing bytes if there's no content type in the header
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new b6f645a4d NUTCH-3001 - fix logic for grabbing bytes if there's no content type in the header
new f078a88df Merge pull request #774 from tballison/NUTCH-3001
b6f645a4d is described below
commit b6f645a4d025fa136f557dd37e9aba611b425fbb
Author: tallison <ta...@apache.org>
AuthorDate: Wed Sep 13 10:37:17 2023 -0400
NUTCH-3001 - fix logic for grabbing bytes if there's no content type in the header
---
.../nutch/protocol/selenium/HttpResponse.java | 78 ++++++++++------------
1 file changed, 37 insertions(+), 41 deletions(-)
diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
index bb3bf6357..750677374 100644
--- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
+++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
@@ -269,55 +269,51 @@ public class HttpResponse implements Response {
String contentType = getHeader(Response.CONTENT_TYPE);
// handle with Selenium only if content type in HTML or XHTML
- if (contentType != null) {
- if (contentType.contains("text/html")
- || contentType.contains("application/xhtml")) {
- readPlainContent(url);
- } else {
- try {
- int contentLength = Integer.MAX_VALUE;
- String contentLengthString = headers.get(Response.CONTENT_LENGTH);
- if (contentLengthString != null) {
- try {
- contentLength = Integer.parseInt(contentLengthString.trim());
- } catch (NumberFormatException ex) {
- throw new HttpException(
- "bad content length: " + contentLengthString);
- }
+ if (contentType != null &&
+ (contentType.contains("text/html") || contentType.contains("application/xhtml"))) {
+ readPlainContent(url);
+ } else {
+ try {
+ int contentLength = Integer.MAX_VALUE;
+ String contentLengthString = headers.get(Response.CONTENT_LENGTH);
+ if (contentLengthString != null) {
+ try {
+ contentLength = Integer.parseInt(contentLengthString.trim());
+ } catch (NumberFormatException ex) {
+ throw new HttpException("bad content length: " + contentLengthString);
}
+ }
- if (http.getMaxContent() >= 0
- && contentLength > http.getMaxContent()) {
- contentLength = http.getMaxContent();
- }
+ if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
+ contentLength = http.getMaxContent();
+ }
- byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
- int bufferFilled = 0;
- int totalRead = 0;
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
- && totalRead + bufferFilled <= contentLength) {
- totalRead += bufferFilled;
- out.write(buffer, 0, bufferFilled);
- }
+ byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
+ int bufferFilled = 0;
+ int totalRead = 0;
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 &&
+ totalRead + bufferFilled <= contentLength) {
+ totalRead += bufferFilled;
+ out.write(buffer, 0, bufferFilled);
+ }
- content = out.toByteArray();
+ content = out.toByteArray();
- } catch (Exception e) {
- if (code == 200)
- throw new IOException(e.toString());
- // for codes other than 200 OK, we are fine with empty content
- } finally {
- if (in != null) {
- in.close();
- }
+ } catch (Exception e) {
+ if (code == 200) {
+ throw new IOException(e.toString());
+ }
+ // for codes other than 200 OK, we are fine with empty content
+ } finally {
+ if (in != null) {
+ in.close();
}
- }
- if (httpHeaders != null) {
- headers.add(Response.RESPONSE_HEADERS, httpHeaders.toString());
}
}
-
+ if (httpHeaders != null) {
+ headers.add(Response.RESPONSE_HEADERS, httpHeaders.toString());
+ }
} catch(KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) {
throw new ProtocolException(e);
} finally {