You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2022/08/19 13:26:13 UTC
[nutch] branch master updated: NUTCH-2930 Protocol-okhttp: implement IP filter (#736)
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 7e969eaec NUTCH-2930 Protocol-okhttp: implement IP filter (#736)
7e969eaec is described below
commit 7e969eaec1ab8e9e21667faf6cf1881fb10cfb31
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Fri Aug 19 15:26:07 2022 +0200
NUTCH-2930 Protocol-okhttp: implement IP filter (#736)
- add include/exclude rules as list of IP address, CIDR notation
or predefined IP ranges (localhost, loopback, sitelocal)
---
conf/nutch-default.xml | 25 +++
.../org/apache/nutch/protocol/okhttp/CIDR.java | 79 ++++++++
.../nutch/protocol/okhttp/IPFilterRules.java | 129 +++++++++++++
.../org/apache/nutch/protocol/okhttp/OkHttp.java | 35 ++++
.../protocol/okhttp/TestBadServerResponses.java | 2 +-
.../protocol/okhttp/TestIPAddressFiltering.java | 207 +++++++++++++++++++++
.../nutch/protocol/okhttp/TestProtocolOkHttp.java | 2 +-
.../protocol/AbstractHttpProtocolPluginTest.java | 22 ++-
8 files changed, 494 insertions(+), 7 deletions(-)
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 1ad02a021..2a6325884 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -449,6 +449,31 @@
</description>
</property>
+<property>
+ <name>http.filter.ipaddress.include</name>
+ <value></value>
+ <description>
+ If not empty: only fetch content from these IP addresses defined
+ as a comma-separated list of a single IP address, a CIDR notation,
+ or one of the following pre-defined IP address types: localhost,
+ loopback, sitelocal. The property http.filter.ipaddress.exclude
+ can be used to block subranges in the included list of ranges.
+ Note: supported only by protocol-okhttp.
+ </description>
+</property>
+
+<property>
+ <name>http.filter.ipaddress.exclude</name>
+ <value></value>
+ <description>
+ If not empty: do not fetch content from these IP addresses defined
+ as a comma-separated list of a single IP address, a CIDR notation,
+ or one of the following pre-defined IP address types: localhost,
+ loopback, sitelocal. Note: supported only by protocol-okhttp.
+ </description>
+</property>
+
+
<!-- FTP properties -->
<property>
diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java
new file mode 100644
index 000000000..3add082a8
--- /dev/null
+++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/CIDR.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import java.net.InetAddress;
+
+import com.google.common.net.InetAddresses;
+
+/**
+ * Parse a <a href=
+ * "https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing">CIDR</a> block
+ * notation and test whether an IP address is contained in the subnet range
+ * defined by the CIDR.
+ */
+public class CIDR {
+ InetAddress addr;
+ int mask;
+
+ public CIDR(InetAddress address, int mask) {
+ this.addr = address;
+ this.mask = mask;
+ }
+
+ public CIDR(String cidr) throws IllegalArgumentException {
+ String ipStr = cidr;
+ int sep = cidr.indexOf('/');
+ if (sep > -1) {
+ ipStr = cidr.substring(0, sep);
+ }
+ addr = InetAddresses.forString(ipStr);
+ if (sep > -1) {
+ mask = Integer.parseInt(cidr.substring(sep + 1));
+ } else {
+ mask = addr.getAddress().length * 8;
+ }
+ if (cidr.indexOf(':') > -1 && addr.getAddress().length == 4) {
+ // IPv4-mapped IPv6 addresses are automatically converted to IPv4,
+ // need to shift the mask
+ mask = Math.max(0, mask - 96);
+ }
+ }
+
+ public boolean contains(InetAddress address) {
+ byte[] addr0 = addr.getAddress();
+ byte[] addr1 = address.getAddress();
+ if (addr0.length != addr1.length) {
+ // not comparing IPv4 and IPv6 addresses
+ return false;
+ }
+ for (int i = 0; i < addr0.length; i++) {
+ int remainingMaskBits = mask - (i * 8);
+ if (remainingMaskBits <= 0)
+ return true;
+ int m = ~(0xff >> remainingMaskBits); // mask for byte under cursor
+ if ((addr0[i] & m) != (addr1[i] & m))
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return addr + "/" + mask;
+ }
+}
diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java
new file mode 100644
index 000000000..868732fe5
--- /dev/null
+++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/IPFilterRules.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import java.lang.invoke.MethodHandles;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Predicate;
+
+import org.apache.hadoop.conf.Configuration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Optionally limit or block connections to IP address ranges
+ * (localhost/loopback or site-local addresses, subnet ranges given in CIDR
+ * notation, or single IP addresses).
+ *
+ * IP filter rules are built from two Nutch properties:
+ * <ul>
+ * <li><code>http.filter.ipaddress.include</code> defines all allowed IP ranges.
+ * If not defined or empty all IP addresses (and not explicitly excluded) are
+ * allowed.
+ * <li><code>http.filter.ipaddress.exclude</code> defines excluded IP address
+ * ranges.
+ * </ul>
+ *
+ * IP ranges can be defined as
+ * <ul>
+ * <li>IP address, e.g. <code>127.0.0.1</code> or <code>::1</code> (IPv6)</li>
+ * <li>CIDR notation, e.g. <code>192.168.0.0/16</code> or
+ * <code>fd00::/8</code></li>
+ * <li><code>localhost</code> or <code>loopback</code> applies to all IP
+ * addresses for which {@link InetAddress#isLoopbackAddress()} is true</li>
+ * <li><code>sitelocal</code> applies to all IP
+ * addresses for which {@link InetAddress#isSiteLocalAddress()} is true</li>
+ * </ul>
+ *
+ * Multiple IP ranges are separated by a comma, e.g. <code>loopback,sitelocal,fd00::/8</code>
+ *
+ */
+public class IPFilterRules {
+
+ protected static final Logger LOG = LoggerFactory
+ .getLogger(MethodHandles.lookup().lookupClass());
+
+ List<Predicate<InetAddress>> includeRules;
+ List<Predicate<InetAddress>> excludeRules;
+
+ public IPFilterRules(Configuration conf) {
+ includeRules = parseIPRules(conf, "http.filter.ipaddress.include");
+ excludeRules = parseIPRules(conf, "http.filter.ipaddress.exclude");
+ }
+
+ public boolean isEmpty() {
+ return !(includeRules.size() > 0 || excludeRules.size() > 0);
+ }
+
+ public boolean accept(InetAddress address) {
+ boolean accept = true;
+ if (includeRules.size() > 0) {
+ accept = false;
+ for (Predicate<InetAddress> rule : includeRules) {
+ if (rule.test(address)) {
+ accept = true;
+ break;
+ }
+ }
+ }
+ if (accept && excludeRules.size() > 0) {
+ for (Predicate<InetAddress> rule : excludeRules) {
+ if (rule.test(address)) {
+ accept = false;
+ break;
+ }
+ }
+ }
+ return accept;
+ }
+
+ private static List<Predicate<InetAddress>> parseIPRules(Configuration conf,
+ String ipRuleProperty) {
+ List<Predicate<InetAddress>> rules = new ArrayList<>();
+ String[] ipRules = conf.getStrings(ipRuleProperty);
+ if (ipRules == null) {
+ return rules;
+ }
+ for (String ipRule : ipRules) {
+ switch (ipRule.toLowerCase()) {
+ case "localhost":
+ case "loopback":
+ rules.add((InetAddress a) -> a.isLoopbackAddress());
+ break;
+ case "sitelocal":
+ rules.add((InetAddress a) -> a.isSiteLocalAddress());
+ break;
+ default:
+ try {
+ CIDR cidr = new CIDR(ipRule);
+ rules.add((InetAddress a) -> cidr.contains(a));
+ } catch (IllegalArgumentException e) {
+ LOG.error(
+ "Failed to parse {} as CIDR, ignoring to configure IP rules ({})",
+ ipRule, ipRuleProperty);
+ }
+ }
+ }
+ if (rules.size() > 0) {
+ LOG.info("Found {} IP filter rules for {}", rules.size(), ipRuleProperty);
+ }
+ return rules;
+ }
+
+}
diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
index 63fa32837..876c4ef24 100644
--- a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
+++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
@@ -212,6 +212,11 @@ public class OkHttp extends HttpBase {
}
}
+ IPFilterRules ipFilterRules = new IPFilterRules(conf);
+ if (!ipFilterRules.isEmpty()) {
+ builder.addNetworkInterceptor(new HTTPFilterIPAddressInterceptor(ipFilterRules));
+ }
+
if (this.storeIPAddress || this.storeHttpHeaders || this.storeHttpRequest) {
builder.addNetworkInterceptor(new HTTPHeadersInterceptor());
}
@@ -259,6 +264,36 @@ public class OkHttp extends HttpBase {
}
}
+ class HTTPFilterIPAddressInterceptor implements Interceptor {
+
+ IPFilterRules rules;
+
+ public HTTPFilterIPAddressInterceptor(IPFilterRules rules) {
+ this.rules = rules;
+ }
+
+ @Override
+ public okhttp3.Response intercept(Interceptor.Chain chain)
+ throws IOException {
+
+ Connection connection = chain.connection();
+ InetAddress address = connection.socket().getInetAddress();
+
+ boolean accept = rules.accept(address);
+
+ Request request = chain.request();
+
+ if (accept) {
+ return chain.proceed(request);
+ }
+
+ LOG.warn("Blocked connection to IP address {}: {}",
+ address.getHostAddress(), request.url());
+ throw new IOException(
+ "Forbidden connection to IP address " + address.getHostAddress());
+ }
+ }
+
class HTTPHeadersInterceptor implements Interceptor {
private String getNormalizedProtocolName(Protocol protocol) {
diff --git a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
index 5a587fea2..7c5d0f15c 100644
--- a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
+++ b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestBadServerResponses.java
@@ -34,7 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Test cases for protocol-http - robustness regarding bad server responses:
+ * Test cases for protocol-okhttp - robustness regarding bad server responses:
* malformed HTTP header lines, etc. See, NUTCH-2549.
*/
public class TestBadServerResponses extends AbstractHttpProtocolPluginTest {
diff --git a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java
new file mode 100644
index 000000000..dbd1b846d
--- /dev/null
+++ b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestIPAddressFiltering.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.okhttp;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.net.InetAddress;
+import java.util.function.Function;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.protocol.AbstractHttpProtocolPluginTest;
+import org.junit.Test;
+
+import com.google.common.net.InetAddresses;
+
+/**
+ * Test cases for protocol-okhttp IP address filtering
+ */
+public class TestIPAddressFiltering extends AbstractHttpProtocolPluginTest {
+
+ @Override
+ protected String getPluginClassName() {
+ return "org.apache.nutch.protocol.okhttp.OkHttp";
+ }
+
+ public InetAddress parseIP(String ip) {
+ // the Java built-in may perform DNS lookup (and throw UnknownHostException)
+ // if not a well-formed IP address:
+ // InetAddress.getByName(ip);
+
+ // use Guava because it does not perform DNS lookups, may throw
+ // IllegalArgumentException if IP address is not well-formed
+ return InetAddresses.forString(ip);
+ }
+
+ public void testCIDRcontains(String cidr, String ip) {
+ CIDR c = new CIDR(cidr);
+ InetAddress i = parseIP(ip);
+ assertTrue(i + " should be in " + c, c.contains(i));
+ }
+
+ public void testCIDRnotContains(String cidr, String ip) {
+ CIDR c = new CIDR(cidr);
+ InetAddress i = parseIP(ip);
+ assertFalse(i + " should not be in " + c, c.contains(i));
+ }
+
+ /** Tests for {@link CIDR} */
+ @Test
+ public void testCIDRs() {
+ // private subnets IPv4
+ testCIDRcontains("127.0.0.0/8", "127.0.0.1");
+ testCIDRcontains("10.0.0.0/8", "10.0.0.13");
+ testCIDRcontains("172.16.0.0/12", "172.17.0.0");
+ testCIDRcontains("192.168.0.0/16", "192.168.0.1");
+
+ // private subnets IPv6
+ testCIDRcontains("::1/128", "::1");
+ testCIDRcontains("127.0.0.0/8", "::ffff:127.0.0.1");
+ testCIDRcontains("::ffff:7f00:0/104", "::ffff:127.0.0.1");
+ testCIDRcontains("fd00::/8", "fd12:3456:789a:1::1");
+ testCIDRcontains("fe80::/10", "fe80::2f29:b6f0:a4c:32ae");
+
+ // test single IP address (with and without mask)
+ testCIDRcontains("127.0.0.1", "127.0.0.1");
+ testCIDRcontains("127.0.0.1/24", "127.0.0.1");
+
+ // test off-by-one boundaries
+ testCIDRnotContains("127.0.0.0/8", "128.0.0.0");
+ testCIDRnotContains("10.0.0.0/8", "11.0.0.0");
+ testCIDRnotContains("10.0.0.0/8", "9.255.255.255");
+ testCIDRnotContains("172.16.0.0/12", "172.32.0.0");
+ testCIDRnotContains("172.16.0.0/12", "171.255.255.255");
+ }
+
+ public void testFilter(Configuration conf, String[] included, String[] excluded) {
+ IPFilterRules ipFilterRules = new IPFilterRules(conf);
+ for (String address : included) {
+ assertTrue("Address " + address + " should be included",
+ ipFilterRules.accept(parseIP(address)));
+ }
+ for (String address : excluded) {
+ assertFalse("Address " + address + " should be excluded",
+ ipFilterRules.accept(parseIP(address)));
+ }
+ }
+
+ /** Tests for {@link IPFilterRules} */
+ @Test
+ public void testIPAddressFilterRules() {
+ String[] publicAddresses = {"93.184.216.34", "93.184.216.43"};
+ String[] loopbackAddresses = {"127.0.0.1", "127.0.0.2", "::1"};
+ String[] sitelocalAddresses = {"10.0.0.13", "172.17.0.0", "192.168.0.1"};
+
+ conf.set("http.filter.ipaddress.include", "");
+ conf.set("http.filter.ipaddress.exclude", "localhost");
+ testFilter(conf, new String[0], loopbackAddresses);
+
+ conf.set("http.filter.ipaddress.exclude", "loopback,sitelocal");
+ testFilter(conf, publicAddresses, loopbackAddresses);
+ testFilter(conf, publicAddresses, sitelocalAddresses);
+
+ conf.set("http.filter.ipaddress.include", "93.184.216.0/8");
+ conf.set("http.filter.ipaddress.exclude", "");
+ testFilter(conf, publicAddresses, loopbackAddresses);
+
+ conf.set("http.filter.ipaddress.include", "localhost");
+ conf.set("http.filter.ipaddress.exclude", "");
+ testFilter(conf, loopbackAddresses, publicAddresses);
+ }
+
+ public void testPredefinedAddressRange(String ipAddress, String type) {
+ try {
+ InetAddress addr = InetAddresses.forString(ipAddress);
+ Function<InetAddress,Boolean> pred = null;
+ switch (type.toLowerCase()) {
+ case "localhost":
+ case "loopback":
+ pred = InetAddress::isLoopbackAddress;
+ break;
+ case "sitelocal":
+ pred = InetAddress::isSiteLocalAddress;
+ break;
+ default:
+ fail("Unknown IP address type " + type);
+ }
+ assertTrue(ipAddress + " is not recognized as " + type + " address", pred.apply(addr));
+ } catch (IllegalArgumentException e) {
+ fail("Not a valid IP address string: " + ipAddress);
+ }
+ }
+
+ /**
+ * Verify that certain IP addresses are matched by predefined IP classes:
+ * localhost, loopback, sitelocal. This verifies that the predefined classes
+ * are properly mapped to the underlying predicates of the class
+ * {@link InetAddress}.
+ */
+ @Test
+ public void testPredefinedRanges() throws Exception {
+ testPredefinedAddressRange("127.0.0.1", "localhost");
+ testPredefinedAddressRange("127.0.0.1", "loopback");
+ testPredefinedAddressRange("10.0.0.13", "sitelocal");
+ testPredefinedAddressRange("172.17.0.0", "sitelocal");
+ testPredefinedAddressRange("192.168.0.1", "sitelocal");
+
+ testPredefinedAddressRange("::1", "loopback");
+ testPredefinedAddressRange("::ffff:127.0.0.1", "loopback");
+ // fec0::/10 - Java follows the "old" standard to define private IPv6 addresses
+ testPredefinedAddressRange("fec0::", "sitelocal");
+ // fd::/8 - not (yet?) recognized as site-local address by InetAddress::isSiteLocalAddress
+ //testPredefinedAddressRange("fd12:3456:789a:1::1", "sitelocal");
+ }
+
+ /**
+ * Test whether connections are blocked according to the IP filter
+ * configuration
+ */
+ @Test
+ public void testConnectionBlocking() throws Exception {
+ localHost = "127.0.0.1";
+ launchServer("/", (responseHeader + simpleContent).getBytes(UTF_8));
+
+ // without filter configured
+ conf.set("http.filter.ipaddress.exclude", "");
+ http.setConf(conf);
+ fetchPage("/", 200, "text/html");
+
+ // filter localhost
+ conf.set("http.filter.ipaddress.exclude", "localhost");
+ http.setConf(conf);
+ fetchPage("/", -1, "text/html");
+
+ // filter loopback
+ conf.set("http.filter.ipaddress.exclude", "localhost");
+ http.setConf(conf);
+ fetchPage("/", -1, "text/html");
+
+ // filter by IP
+ conf.set("http.filter.ipaddress.exclude", "127.0.0.1");
+ http.setConf(conf);
+ fetchPage("/", -1, "text/html");
+
+ // filter by CIDR
+ conf.set("http.filter.ipaddress.exclude", "127.0.0.0/8");
+ http.setConf(conf);
+ fetchPage("/", -1, "text/html");
+ }
+
+}
diff --git a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
index 289e75672..e740ed288 100644
--- a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
+++ b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestProtocolOkHttp.java
@@ -25,7 +25,7 @@ import org.apache.nutch.protocol.AbstractHttpProtocolPluginTest;
import org.junit.Test;
/**
- * Test cases for protocol-http
+ * Test cases for protocol-okhttp
*/
public class TestProtocolOkHttp extends AbstractHttpProtocolPluginTest {
diff --git a/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java b/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
index 3a90e21a9..322b34e99 100644
--- a/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
+++ b/src/test/org/apache/nutch/protocol/AbstractHttpProtocolPluginTest.java
@@ -28,7 +28,6 @@ import java.net.Socket;
import java.net.SocketException;
import java.net.URL;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -60,6 +59,17 @@ public abstract class AbstractHttpProtocolPluginTest {
protected Protocol http;
protected ServerSocket server;
protected Configuration conf;
+
+ /** Protocol / URL scheme used to send/receive test requests */
+ protected String protocol = "http";
+
+ /**
+ * URL host name used to represent localhost when sending/receiving test
+ * requests
+ */
+ protected String localHost = "127.0.0.1";
+
+ /** Port used to send/receive test requests */
protected int defaultPort = 47505;
protected static final String responseHeader = "HTTP/1.1 200 OK\r\n";
@@ -103,7 +113,9 @@ public abstract class AbstractHttpProtocolPluginTest {
@After
public void tearDown() throws Exception {
- server.close();
+ if (server != null) {
+ server.close();
+ }
}
/**
@@ -123,13 +135,13 @@ public abstract class AbstractHttpProtocolPluginTest {
BiFunction<String, String[], byte[]> responder,
Predicate<List<String>> requestChecker) throws Exception {
server = new ServerSocket();
- server.bind(new InetSocketAddress("127.0.0.1", port));
+ server.bind(new InetSocketAddress(localHost, port));
Pattern requestPattern = Pattern.compile("(?i)^GET\\s+(\\S+)");
while (true) {
LOG.info("Listening on port {}", port);
if (server.isClosed()) {
server = new ServerSocket();
- server.bind(new InetSocketAddress("127.0.0.1", port));
+ server.bind(new InetSocketAddress(localHost, port));
}
Socket socket = server.accept();
LOG.info("Connection received");
@@ -259,7 +271,7 @@ public abstract class AbstractHttpProtocolPluginTest {
*/
protected ProtocolOutput fetchPage(int port, String page, int expectedCode,
String expectedContentType) throws Exception {
- URL url = new URL("http", "127.0.0.1", port, page);
+ URL url = new URL(protocol, localHost, port, page);
LOG.info("Fetching {}", url);
CrawlDatum crawlDatum = new CrawlDatum();
ProtocolOutput protocolOutput = http