You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by av...@apache.org on 2021/01/29 12:24:47 UTC
[ignite] branch ignite-ducktape updated: IGNITE-14054 : Improve
discovery ducktest: add partial network drop. (#8714)
This is an automated email from the ASF dual-hosted git repository.
av pushed a commit to branch ignite-ducktape
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/ignite-ducktape by this push:
new f610f56 IGNITE-14054 : Improve discovery ducktest: add partial network drop. (#8714)
f610f56 is described below
commit f610f56c2db0a41243ed5ac9a8230e21bdb363d4
Author: Vladimir Steshin <vl...@gmail.com>
AuthorDate: Fri Jan 29 15:24:28 2021 +0300
IGNITE-14054 : Improve discovery ducktest: add partial network drop. (#8714)
---
.../ignitetest/services/utils/ignite_aware.py | 35 +++++++++++++++-------
.../services/utils/templates/discovery_macro.j2 | 3 ++
.../tests/ignitetest/tests/discovery_test.py | 30 +++++++++++++------
3 files changed, 49 insertions(+), 19 deletions(-)
diff --git a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
index 89f7a4f..d1a3a38 100644
--- a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
+++ b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
@@ -23,6 +23,7 @@ import sys
import time
from abc import abstractmethod, ABCMeta
from datetime import datetime
+from enum import IntEnum
from threading import Thread
from ducktape.utils.util import wait_until
@@ -33,6 +34,7 @@ from ignitetest.services.utils.path import IgnitePathAware
from ignitetest.services.utils.ignite_spec import resolve_spec
from ignitetest.services.utils.jmx_utils import ignite_jmx_mixin
from ignitetest.services.utils.log_utils import monitor_log
+from ignitetest.utils.enum import constructible
# pylint: disable=too-many-public-methods
@@ -40,6 +42,14 @@ class IgniteAwareService(BackgroundThreadService, IgnitePathAware, metaclass=ABC
"""
The base class to build services aware of Ignite.
"""
+ @constructible
+ class NetPart(IntEnum):
+ """
+ Network part to emulate failure.
+ """
+ INPUT = 0
+ OUTPUT = 1
+ ALL = 2
# pylint: disable=R0913
def __init__(self, context, config, num_nodes, startup_timeout_sec, shutdown_timeout_sec, **kwargs):
@@ -296,19 +306,24 @@ class IgniteAwareService(BackgroundThreadService, IgnitePathAware, metaclass=ABC
"""
return os.path.join(self.temp_dir, "iptables.bak")
- def drop_network(self, nodes=None):
+ def drop_network(self, nodes=None, net_part: NetPart = NetPart.ALL):
"""
Disconnects node from cluster.
+ :param nodes: Nodes to emulate network failure on.
+ :param net_part: Part of network to emulate failure of.
"""
if nodes is None:
assert self.num_nodes == 1
nodes = self.nodes
for node in nodes:
- self.logger.info("Dropping ignite connections on '" + node.account.hostname + "' ...")
+ self.logger.info("Dropping " + str(net_part) + " Ignite connections on '" + node.account.hostname + "' ...")
self.__backup_iptables(nodes)
+ return self.exec_on_nodes_async(nodes, lambda n: self.__enable_netfilter(n, net_part))
+
+ def __enable_netfilter(self, node, net_part: NetPart):
cm_spi = self.config.communication_spi
dsc_spi = self.config.discovery_spi
@@ -318,15 +333,15 @@ class IgniteAwareService(BackgroundThreadService, IgnitePathAware, metaclass=ABC
dsc_ports = str(dsc_spi.port) if not hasattr(dsc_spi, 'port_range') or dsc_spi.port_range < 1 else str(
dsc_spi.port) + ':' + str(dsc_spi.port + dsc_spi.port_range)
- cmd = f"sudo iptables -I %s 1 -p tcp -m multiport --dport {dsc_ports},{cm_ports} -j DROP"
+ if net_part in (IgniteAwareService.NetPart.ALL, IgniteAwareService.NetPart.INPUT):
+ node.account.ssh_client.exec_command(
+ f"sudo iptables -I INPUT 1 -p tcp -m multiport --dport {dsc_ports},{cm_ports} -j DROP")
+
+ if net_part in (IgniteAwareService.NetPart.ALL, IgniteAwareService.NetPart.OUTPUT):
+ node.account.ssh_client.exec_command(
+ f"sudo iptables -I OUTPUT 1 -p tcp -m multiport --dport {dsc_ports},{cm_ports} -j DROP")
- return self.exec_on_nodes_async(nodes,
- lambda n: (n.account.ssh_client.exec_command(cmd % "INPUT"),
- n.account.ssh_client.exec_command(cmd % "OUTPUT"),
- self.logger.debug("Activated netfilter on '%s': %s" %
- (n.name, self.__dump_netfilter_settings(n)))
- )
- )
+ self.logger.debug("Activated netfilter on '%s': %s" % (node.name, self.__dump_netfilter_settings(node)))
def __backup_iptables(self, nodes):
# Store current network filter settings.
diff --git a/modules/ducktests/tests/ignitetest/services/utils/templates/discovery_macro.j2 b/modules/ducktests/tests/ignitetest/services/utils/templates/discovery_macro.j2
index acbc53f..9ded542 100644
--- a/modules/ducktests/tests/ignitetest/services/utils/templates/discovery_macro.j2
+++ b/modules/ducktests/tests/ignitetest/services/utils/templates/discovery_macro.j2
@@ -53,6 +53,9 @@
{% endif %}
<property name="localPort" value="{{ spi.port }}"/>
<property name="localPortRange" value="{{ spi.port_range }}"/>
+ {% if spi.connRecoveryTimeout is defined %}
+ <property name="connectionRecoveryTimeout" value="{{ spi.connRecoveryTimeout }}"/>
+ {% endif %}
{{ ip_finder(spi) }}
</bean>
{% endmacro %}
diff --git a/modules/ducktests/tests/ignitetest/tests/discovery_test.py b/modules/ducktests/tests/ignitetest/tests/discovery_test.py
index 07d1d7a..6ce08a0 100644
--- a/modules/ducktests/tests/ignitetest/tests/discovery_test.py
+++ b/modules/ducktests/tests/ignitetest/tests/discovery_test.py
@@ -61,6 +61,8 @@ class DiscoveryTestConfig(NamedTuple):
sequential_failure: bool = False
with_zk: bool = False
failure_detection_timeout: int = 1000
+ disable_conn_recovery: bool = False
+ net_part: IgniteAwareService.NetPart = IgniteService.NetPart.ALL
# pylint: disable=W0223, no-member
@@ -88,29 +90,35 @@ class DiscoveryTest(IgniteTest):
@cluster(num_nodes=MAX_CONTAINERS)
@ignite_versions(str(DEV_BRANCH), str(LATEST))
- @matrix(nodes_to_kill=[1, 2], failure_detection_timeout=[FAILURE_TIMEOUT],
+ @matrix(nodes_to_kill=[1, 2], failure_detection_timeout=[FAILURE_TIMEOUT], disable_conn_recovery=[False, True],
+ net_part=[IgniteService.NetPart.ALL, IgniteService.NetPart.INPUT],
load_type=[ClusterLoad.NONE, ClusterLoad.ATOMIC, ClusterLoad.TRANSACTIONAL])
- def test_nodes_fail_not_sequential_tcp(self, ignite_version, nodes_to_kill, load_type, failure_detection_timeout):
+ def test_nodes_fail_not_sequential_tcp(self, ignite_version, nodes_to_kill, load_type, failure_detection_timeout,
+ disable_conn_recovery: bool, net_part: IgniteService.NetPart):
"""
Test nodes failure scenario with TcpDiscoverySpi not allowing nodes to fail in a row.
"""
test_config = DiscoveryTestConfig(version=IgniteVersion(ignite_version), nodes_to_kill=nodes_to_kill,
load_type=ClusterLoad.construct_from(load_type), sequential_failure=False,
- failure_detection_timeout=failure_detection_timeout)
+ failure_detection_timeout=failure_detection_timeout,
+ disable_conn_recovery=disable_conn_recovery, net_part=net_part)
return self._perform_node_fail_scenario(test_config)
@cluster(num_nodes=MAX_CONTAINERS)
@ignite_versions(str(DEV_BRANCH), str(LATEST))
@matrix(load_type=[ClusterLoad.NONE, ClusterLoad.ATOMIC, ClusterLoad.TRANSACTIONAL],
- failure_detection_timeout=[FAILURE_TIMEOUT])
- def test_2_nodes_fail_sequential_tcp(self, ignite_version, load_type, failure_detection_timeout):
+ net_part=[IgniteService.NetPart.ALL, IgniteService.NetPart.INPUT],
+ failure_detection_timeout=[FAILURE_TIMEOUT], disable_conn_recovery=[False, True])
+ def test_2_nodes_fail_sequential_tcp(self, ignite_version, load_type, failure_detection_timeout,
+ disable_conn_recovery: bool, net_part: IgniteService.NetPart):
"""
Test 2 nodes sequential failure scenario with TcpDiscoverySpi.
"""
test_config = DiscoveryTestConfig(version=IgniteVersion(ignite_version), nodes_to_kill=2,
load_type=ClusterLoad.construct_from(load_type), sequential_failure=True,
- failure_detection_timeout=failure_detection_timeout)
+ failure_detection_timeout=failure_detection_timeout,
+ disable_conn_recovery=disable_conn_recovery, net_part=net_part)
return self._perform_node_fail_scenario(test_config)
@@ -169,6 +177,9 @@ class DiscoveryTest(IgniteTest):
if LATEST_2_7 < test_config.version <= V_2_9_0:
discovery_spi.so_linger = 0
+ if test_config.disable_conn_recovery:
+ discovery_spi.connRecoveryTimeout = 0
+
ignite_config = IgniteConfiguration(
version=test_config.version,
discovery_spi=discovery_spi,
@@ -205,11 +216,12 @@ class DiscoveryTest(IgniteTest):
start_load_app(self.test_context, ignite_config=load_config, params=params, modules=modules)
results.update(self._simulate_and_detect_failure(servers, failed_nodes,
- test_config.failure_detection_timeout * 4))
+ test_config.failure_detection_timeout * 4,
+ test_config.net_part))
return results
- def _simulate_and_detect_failure(self, servers, failed_nodes, timeout):
+ def _simulate_and_detect_failure(self, servers, failed_nodes, timeout, net_part: IgniteAwareService.NetPart):
"""
Perform node failure scenario
"""
@@ -219,7 +231,7 @@ class DiscoveryTest(IgniteTest):
ids_to_wait = [node_id(n) for n in failed_nodes]
- _, first_terminated = servers.drop_network(failed_nodes)
+ _, first_terminated = servers.drop_network(failed_nodes, net_part=net_part)
# Keeps dates of logged node failures.
logged_timestamps = []