You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2019/04/03 16:49:50 UTC
[hadoop] branch trunk updated: HDDS-1164. Add New blockade Tests to
test Replica Manager. Contributed by Nilotpal Nandi.
This is an automated email from the ASF dual-hosted git repository.
shashikant pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 7cd7045 HDDS-1164. Add New blockade Tests to test Replica Manager. Contributed by Nilotpal Nandi.
7cd7045 is described below
commit 7cd7045eea24ee0001f9069be370a028c64e36a6
Author: Shashikant Banerjee <sh...@apache.org>
AuthorDate: Wed Apr 3 22:19:10 2019 +0530
HDDS-1164. Add New blockade Tests to test Replica Manager. Contributed by Nilotpal Nandi.
---
.../src/main/blockade/blockadeUtils/blockade.py | 5 +-
.../main/blockade/clusterUtils/cluster_utils.py | 23 +-
.../main/blockade/test_blockade_client_failure.py | 9 +-
.../blockade/test_blockade_datanode_isolation.py | 5 +-
.../main/blockade/test_blockade_mixed_failure.py | 152 +++++++-----
...t_blockade_mixed_failure_three_nodes_isolate.py | 255 ++++++++++++++-------
.../test_blockade_mixed_failure_two_nodes.py | 188 +++++++++------
.../main/blockade/test_blockade_scm_isolation.py | 5 +-
8 files changed, 410 insertions(+), 232 deletions(-)
diff --git a/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py b/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
index c3d1bbb..f371865 100644
--- a/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
+++ b/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
@@ -45,9 +45,8 @@ class Blockade(object):
@classmethod
def make_flaky(cls, flaky_node, container_list):
# make the network flaky
- om = filter(lambda x: 'ozoneManager' in x, container_list)
- scm = filter(lambda x: 'scm' in x, container_list)
- datanodes = filter(lambda x: 'datanode' in x, container_list)
+ om, scm, _, datanodes = \
+ ClusterUtils.find_om_scm_client_datanodes(container_list)
node_dict = {
"all": "--all",
"scm" : scm[0],
diff --git a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
index baa3960..3a04103 100644
--- a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
+++ b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
@@ -71,7 +71,7 @@ class ClusterUtils(object):
@classmethod
def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
num_keys, key_size, replication_type, replication_factor,
- freon_client='ozoneManager'):
+ freon_client='om'):
# run freon
cmd = "docker-compose -f %s " \
"exec %s /opt/hadoop/bin/ozone " \
@@ -115,7 +115,7 @@ class ClusterUtils(object):
@classmethod
def get_ozone_confkey_value(cls, docker_compose_file, key_name):
cmd = "docker-compose -f %s " \
- "exec ozoneManager /opt/hadoop/bin/ozone " \
+ "exec om /opt/hadoop/bin/ozone " \
"getconf -confKey %s" \
% (docker_compose_file, key_name)
exit_code, output = cls.run_cmd(cmd)
@@ -307,3 +307,22 @@ class ClusterUtils(object):
checksum = finaloutput.split(" ")
logger.info("Checksum of %s is : %s", filepath, checksum[0])
return checksum[0]
+
+ @classmethod
+ def get_pipelines(cls, docker_compose_file):
+ command = "docker-compose -f %s " \
+ + "exec ozone_client /opt/hadoop/bin/ozone scmcli " \
+ + "listPipelines" % (docker_compose_file)
+ exit_code, output = cls.run_cmd(command)
+ assert exit_code == 0, "list pipeline command failed"
+ return output
+
+ @classmethod
+ def find_om_scm_client_datanodes(cls, container_list):
+
+ om = filter(lambda x: 'om_1' in x, container_list)
+ scm = filter(lambda x: 'scm' in x, container_list)
+ datanodes = sorted(
+ list(filter(lambda x: 'datanode' in x, container_list)))
+ client = filter(lambda x: 'ozone_client' in x, container_list)
+ return om, scm, client, datanodes
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
index c9e4ae5..8c0b518 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
@@ -47,11 +47,8 @@ def setup():
exit_code, output = Blockade.blockade_status()
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
output
- OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
- SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
- DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
- CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST)
-
+ OM, SCM, CLIENT, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
"THREE", "ozone_client")
assert exit_code == 0, "freon run failed with output=[%s]" % output
@@ -121,4 +118,4 @@ def test_client_failure_isolate_one_datanode():
test_key_name, "/tmp/")
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
- assert key_checksum == ORIG_CHECKSUM
+ assert key_checksum == ORIG_CHECKSUM
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
index fecd9d1..1e53a32 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
@@ -42,9 +42,8 @@ def setup():
exit_code, output = Blockade.blockade_status()
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
output
- OM = [x for x in CONTAINER_LIST if 'ozoneManager' in x]
- SCM = [x for x in CONTAINER_LIST if 'scm' in x]
- DATANODES = sorted(x for x in CONTAINER_LIST if 'datanode' in x)
+ OM, SCM, _, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
"THREE")
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
index 59755e0..8493ce0 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
@@ -18,16 +18,17 @@
import os
import time
import logging
+import re
from blockadeUtils.blockade import Blockade
from clusterUtils.cluster_utils import ClusterUtils
-
logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3
+INCREASED_SCALE = 5
CONTAINER_LIST = []
OM = []
SCM = []
@@ -35,83 +36,114 @@ DATANODES = []
def setup():
- global CONTAINER_LIST, OM, SCM, DATANODES
- Blockade.blockade_destroy()
- CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
- exit_code, output = Blockade.blockade_status()
- assert exit_code == 0, "blockade status command failed with output=[%s]" % \
- output
- OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
- SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
- DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
+ global CONTAINER_LIST, OM, SCM, DATANODES
+ Blockade.blockade_destroy()
+ CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
+ exit_code, output = Blockade.blockade_status()
+ assert exit_code == 0, "blockade status command failed with output=[%s]" % \
+ output
+ OM, SCM, _, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
- exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
- "THREE")
- assert exit_code == 0, "freon run failed with output=[%s]" % output
+ exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+ "THREE")
+ assert exit_code == 0, "freon run failed with output=[%s]" % output
def teardown():
- logger.info("Inside teardown")
- Blockade.blockade_destroy()
+ logger.info("Inside teardown")
+ Blockade.blockade_destroy()
def teardown_module():
- ClusterUtils.cluster_destroy(FILE)
+ ClusterUtils.cluster_destroy(FILE)
-def test_one_dn_isolate_scm_other_dn():
- """
- In this test, one of the datanodes cannot communicate with SCM and other
- datanodes.
- Other datanodes can communicate with each other and SCM .
- Expectation : The container should eventually have two closed replicas.
- """
- first_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
- second_set = [OM[0], DATANODES[0]]
- Blockade.blockade_create_partition(first_set, second_set)
+def test_one_dn_isolate_scm_other_dn(run_second_phase):
+ """
+ In this test, one of the datanodes cannot communicate with SCM and other
+ datanodes.
+ Other datanodes can communicate with each other and SCM .
+ Expectation : The container should eventually have two closed replicas.
+ """
+ first_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
+ second_set = [OM[0], DATANODES[0]]
+ Blockade.blockade_create_partition(first_set, second_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+ all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) == 2, \
+ "The container should have two closed replicas."
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
- all_datanodes_container_status)
- assert len(count_closed_container_datanodes) == 2, \
- "The container should have two closed replicas."
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) >= 3, \
+ "The container should have at least three closed replicas."
+ _, output = \
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ assert re.search("Status: Success", output) is not None
-def test_one_dn_isolate_other_dn():
- """
- In this test, one of the datanodes (first datanode) cannot communicate
- other datanodes but can communicate with SCM.
- One of the other two datanodes (second datanode) cannot communicate with
- SCM.
- Expectation :
- The container replica state in first datanode can be either closed or
- quasi-closed.
- The container replica state in second datanode can be either closed or open.
- The container should eventually have at lease one closed replica.
- """
- first_set = [OM[0], SCM[0], DATANODES[0]]
- second_set = [OM[0], DATANODES[1], DATANODES[2]]
- third_set = [SCM[0], DATANODES[2]]
- Blockade.blockade_create_partition(first_set, second_set, third_set)
+def test_one_dn_isolate_other_dn(run_second_phase):
+ """
+ In this test, one of the datanodes (first datanode) cannot communicate
+ other datanodes but can communicate with SCM.
+ One of the other two datanodes (second datanode) cannot communicate with
+ SCM.
+ Expectation :
+ The container replica state in first datanode can be either closed or
+ quasi-closed.
+ The container replica state in second datanode can be either closed or open.
+ The container should eventually have at lease one closed replica.
+ """
+ first_set = [OM[0], SCM[0], DATANODES[0]]
+ second_set = [OM[0], DATANODES[1], DATANODES[2]]
+ third_set = [SCM[0], DATANODES[2]]
+ Blockade.blockade_create_partition(first_set, second_set, third_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+ all_datanodes_container_status)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ assert first_datanode_status == 'CLOSED' or \
+ first_datanode_status == "QUASI_CLOSED"
+ assert second_datanode_status == 'CLOSED' or \
+ second_datanode_status == "OPEN"
+ assert len(count_closed_container_datanodes) >= 1, \
+ "The container should have at least one closed replica"
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
- all_datanodes_container_status)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- assert first_datanode_status == 'CLOSED' or \
- first_datanode_status == "QUASI_CLOSED"
- assert second_datanode_status == 'CLOSED' or \
- second_datanode_status == "OPEN"
- assert len(count_closed_container_datanodes) >= 1, \
- "The container should have at least one closed replica"
\ No newline at end of file
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) >= 3, \
+ "The container should have at least three closed replicas."
+ _, output = \
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ assert re.search("Status: Success", output) is not None
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
index ee4d031..0e50025 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
@@ -18,16 +18,17 @@
import os
import time
import logging
+import re
from blockadeUtils.blockade import Blockade
from clusterUtils.cluster_utils import ClusterUtils
-
logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3
+INCREASED_SCALE = 5
CONTAINER_LIST = []
OM = []
SCM = []
@@ -35,110 +36,190 @@ DATANODES = []
def setup():
- global CONTAINER_LIST, OM, SCM, DATANODES
- Blockade.blockade_destroy()
- CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
- exit_code, output = Blockade.blockade_status()
- assert exit_code == 0, "blockade status command failed with output=[%s]" % \
- output
- OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
- SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
- DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
-
- exit_code, output = \
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
- assert exit_code == 0, "freon run failed with output=[%s]" % output
+ global CONTAINER_LIST, OM, SCM, DATANODES
+ Blockade.blockade_destroy()
+ CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
+ exit_code, output = Blockade.blockade_status()
+ assert exit_code == 0, "blockade status command failed with output=[%s]" % \
+ output
+ OM, SCM, _, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
+
+ exit_code, output = \
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ assert exit_code == 0, "freon run failed with output=[%s]" % output
def teardown():
- logger.info("Inside teardown")
- Blockade.blockade_destroy()
+ logger.info("Inside teardown")
+ Blockade.blockade_destroy()
def teardown_module():
- ClusterUtils.cluster_destroy(FILE)
-
-
-def test_three_dns_isolate_onescmfailure():
- """
- In this test, all datanodes are isolated from each other.
- One of the datanodes (third datanode) cannot communicate with SCM.
- Expectation :
- The container replica state in first datanode should be closed.
- The container replica state in second datanode should be closed.
- The container replica state in third datanode should be open.
- """
- first_set = [OM[0], SCM[0], DATANODES[0]]
- second_set = [OM[0], SCM[0], DATANODES[1]]
- third_set = [OM[0], DATANODES[2]]
- Blockade.blockade_create_partition(first_set, second_set, third_set)
+ ClusterUtils.cluster_destroy(FILE)
+
+
+def test_three_dns_isolate_onescmfailure(run_second_phase):
+ """
+ In this test, all datanodes are isolated from each other.
+ One of the datanodes (third datanode) cannot communicate with SCM.
+ Expectation :
+ The container replica state in first datanode should be closed.
+ The container replica state in second datanode should be closed.
+ The container replica state in third datanode should be open.
+ """
+ first_set = [OM[0], SCM[0], DATANODES[0]]
+ second_set = [OM[0], SCM[0], DATANODES[1]]
+ third_set = [OM[0], DATANODES[2]]
+ Blockade.blockade_create_partition(first_set, second_set, third_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ third_datanode_status = all_datanodes_container_status[2]
+ assert first_datanode_status == 'CLOSED'
+ assert second_datanode_status == 'CLOSED'
+ assert third_datanode_status == 'OPEN'
+
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- third_datanode_status = all_datanodes_container_status[2]
- assert first_datanode_status == 'CLOSED'
- assert second_datanode_status == 'CLOSED'
- assert third_datanode_status == 'OPEN'
-
-
-def test_three_dns_isolate_twoscmfailure():
- """
- In this test, all datanodes are isolated from each other.
- two datanodes cannot communicate with SCM (second datanode and third
- datanode)
- Expectation :
- The container replica state in first datanode should be quasi-closed.
- The container replica state in second datanode should be open.
- The container replica state in third datanode should be open.
- """
- first_set = [OM[0], SCM[0], DATANODES[0]]
- second_set = [OM[0], DATANODES[1]]
- third_set = [OM[0], DATANODES[2]]
- Blockade.blockade_create_partition(first_set, second_set, third_set)
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) == 3, \
+ "The container should have three closed replicas."
+ Blockade.blockade_join()
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- third_datanode_status = all_datanodes_container_status[2]
- assert first_datanode_status == 'QUASI_CLOSED'
- assert second_datanode_status == 'OPEN'
- assert third_datanode_status == 'OPEN'
-
-
-def test_three_dns_isolate_threescmfailure():
- """
- In this test, all datanodes are isolated from each other and also cannot
- communicate with SCM.
- Expectation :
- The container replica state in first datanode should be open.
- The container replica state in second datanode should be open.
- The container replica state in third datanode should be open.
- """
- first_set = [OM[0], DATANODES[0]]
- second_set = [OM[0], DATANODES[1]]
- third_set = [OM[0], DATANODES[2]]
- Blockade.blockade_create_partition(first_set, second_set, third_set)
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) == 3, \
+ "The container should have three closed replicas."
+
+
+def test_three_dns_isolate_twoscmfailure(run_second_phase):
+ """
+ In this test, all datanodes are isolated from each other.
+ two datanodes cannot communicate with SCM (second datanode and third
+ datanode)
+ Expectation :
+ The container replica state in first datanode should be quasi-closed.
+ The container replica state in second datanode should be open.
+ The container replica state in third datanode should be open.
+ """
+ first_set = [OM[0], SCM[0], DATANODES[0]]
+ second_set = [OM[0], DATANODES[1]]
+ third_set = [OM[0], DATANODES[2]]
+ Blockade.blockade_create_partition(first_set, second_set, third_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ third_datanode_status = all_datanodes_container_status[2]
+ assert first_datanode_status == 'QUASI_CLOSED'
+ assert second_datanode_status == 'OPEN'
+ assert third_datanode_status == 'OPEN'
+
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
+ Blockade.blockade_status()
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_quasi_closed_container_datanodes = filter(
+ lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
+ assert len(count_quasi_closed_container_datanodes) >= 3, \
+ "The container should have at least three quasi-closed replicas."
+ Blockade.blockade_join()
+ Blockade.blockade_status()
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) == 3, \
+ "The container should have three closed replicas."
+
+
+def test_three_dns_isolate_threescmfailure(run_second_phase):
+ """
+ In this test, all datanodes are isolated from each other and also cannot
+ communicate with SCM.
+ Expectation :
+ The container replica state in first datanode should be open.
+ The container replica state in second datanode should be open.
+ The container replica state in third datanode should be open.
+ """
+ first_set = [OM[0], DATANODES[0]]
+ second_set = [OM[0], DATANODES[1]]
+ third_set = [OM[0], DATANODES[2]]
+ Blockade.blockade_create_partition(first_set, second_set, third_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ third_datanode_status = all_datanodes_container_status[2]
+ assert first_datanode_status == 'OPEN'
+ assert second_datanode_status == 'OPEN'
+ assert third_datanode_status == 'OPEN'
+
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
+ Blockade.blockade_status()
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ output = ClusterUtils.get_pipelines(FILE)
+ if output:
+ assert re.search("Factor:THREE", output) is None
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ datanodes_having_container_status = filter(
+ lambda x: x != 'None', all_datanodes_container_status)
+ assert len(datanodes_having_container_status) == 3, \
+ "Containers should not be replicated on addition of new nodes."
+ Blockade.blockade_join()
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- third_datanode_status = all_datanodes_container_status[2]
- assert first_datanode_status == 'OPEN'
- assert second_datanode_status == 'OPEN'
- assert third_datanode_status == 'OPEN'
\ No newline at end of file
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) == 3, \
+ "The container should have three closed replicas."
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
index a8a6f9b..b8df2fa 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
@@ -18,16 +18,17 @@
import os
import time
import logging
+import re
from blockadeUtils.blockade import Blockade
from clusterUtils.cluster_utils import ClusterUtils
-
logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3
+INCREASED_SCALE = 5
CONTAINER_LIST = []
OM = []
SCM = []
@@ -35,87 +36,138 @@ DATANODES = []
def setup():
- global CONTAINER_LIST, OM, SCM, DATANODES
- Blockade.blockade_destroy()
- CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
- exit_code, output = Blockade.blockade_status()
- assert exit_code == 0, "blockade status command failed with output=[%s]" % \
- output
- OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
- SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
- DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
+ global CONTAINER_LIST, OM, SCM, DATANODES
+ Blockade.blockade_destroy()
+ CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
+ exit_code, output = Blockade.blockade_status()
+ assert exit_code == 0, "blockade status command failed with output=[%s]" % \
+ output
+ OM, SCM, _, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
- exit_code, output = \
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
- assert exit_code == 0, "freon run failed with output=[%s]" % output
+ exit_code, output = \
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ assert exit_code == 0, "freon run failed with output=[%s]" % output
def teardown():
- logger.info("Inside teardown")
- Blockade.blockade_destroy()
+ logger.info("Inside teardown")
+ Blockade.blockade_destroy()
def teardown_module():
- ClusterUtils.cluster_destroy(FILE)
-
-
-def test_two_dns_isolate_scm_same_partition():
- """
- In this test, one of the datanodes (first datanode) cannot communicate
- with other two datanodes.
- Two datanodes (second datanode and third datanode), on same network
- parition, cannot communicate with SCM.
- Expectation :
- The container replica state in first datanode should be quasi-closed.
- The container replica state in second datanode should be open.
- The container replica state in third datanode should be open.
- """
- first_set = [OM[0], DATANODES[1], DATANODES[2]]
- second_set = [OM[0], SCM[0], DATANODES[0]]
- Blockade.blockade_create_partition(first_set, second_set)
+ ClusterUtils.cluster_destroy(FILE)
+
+
+def test_two_dns_isolate_scm_same_partition(run_second_phase):
+ """
+ In this test, there are three datanodes, DN1, DN2, DN3
+ DN1 is on a network partition and
+ DN2, DN3 are on a different network partition.
+ DN2 and DN3 cannot communicate with SCM.
+ Expectation :
+ The container replica state in DN1 should be quasi-closed.
+ The container replica state in DN2 should be open.
+ The container replica state in DN3 should be open.
+ """
+ first_set = [OM[0], DATANODES[1], DATANODES[2]]
+ second_set = [OM[0], SCM[0], DATANODES[0]]
+ Blockade.blockade_create_partition(first_set, second_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ third_datanode_status = all_datanodes_container_status[2]
+ assert first_datanode_status == 'QUASI_CLOSED'
+ assert second_datanode_status == 'OPEN'
+ assert third_datanode_status == 'OPEN'
+
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- third_datanode_status = all_datanodes_container_status[2]
- assert first_datanode_status == 'QUASI_CLOSED'
- assert second_datanode_status == 'OPEN'
- assert third_datanode_status == 'OPEN'
-
-
-def test_two_dns_isolate_scm_different_partition():
- """
- In this test, one of the datanodes (first datanode) cannot communicate with
- other two datanodes.
- Two datanodes (first datanode and second datanode),
- on different network paritions, cannot communicate with SCM.
- Expectation :
- The container replica state in first datanode should be open.
- The container replica states can be either 'closed'
- in both second and third datanode, or,
- 'open' in second datanode and 'quasi-closed' in third datanode.
- """
- first_set = [OM[0], DATANODES[0]]
- second_set = [OM[0], DATANODES[1], DATANODES[2]]
- third_set = [SCM[0], DATANODES[2]]
- Blockade.blockade_create_partition(first_set, second_set, third_set)
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_quasi_closed_container_datanodes = filter(
+ lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
+ assert len(count_quasi_closed_container_datanodes) >= 3, \
+ "The container should have at least three quasi-closed replicas."
+ Blockade.blockade_join()
+ Blockade.blockade_status()
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) >= 3
+
+
+def test_two_dns_isolate_scm_different_partition(run_second_phase):
+ """
+ In this test, there are three datanodes, DN1, DN2, DN3
+ DN1 is on a network partition and
+ DN2, DN3 are on a different network partition.
+ DN1 and DN2 cannot communicate with SCM.
+ Expectation :
+ The container replica state in datanode DN1 should be open.
+ The container replica states can be either 'closed'
+ in DN2 and DN3, or,
+ 'open' in DN2 and 'quasi-closed' in DN3.
+ """
+ first_set = [OM[0], DATANODES[0]]
+ second_set = [OM[0], DATANODES[1], DATANODES[2]]
+ third_set = [SCM[0], DATANODES[2]]
+ Blockade.blockade_create_partition(first_set, second_set, third_set)
+ Blockade.blockade_status()
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ logger.info("Waiting for %s seconds before checking container status",
+ os.environ["CONTAINER_STATUS_SLEEP"])
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(FILE, SCALE)
+ first_datanode_status = all_datanodes_container_status[0]
+ second_datanode_status = all_datanodes_container_status[1]
+ third_datanode_status = all_datanodes_container_status[2]
+ assert first_datanode_status == 'OPEN'
+ assert (second_datanode_status == 'CLOSED' and
+ third_datanode_status == 'CLOSED') or \
+ (second_datanode_status == 'OPEN' and
+ third_datanode_status == 'QUASI_CLOSED')
+
+ if str(run_second_phase).lower() == "true":
+ ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
Blockade.blockade_status()
- ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
logger.info("Waiting for %s seconds before checking container status",
os.environ["CONTAINER_STATUS_SLEEP"])
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
all_datanodes_container_status = \
- ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
- first_datanode_status = all_datanodes_container_status[0]
- second_datanode_status = all_datanodes_container_status[1]
- third_datanode_status = all_datanodes_container_status[2]
- assert first_datanode_status == 'OPEN'
- assert (second_datanode_status == 'CLOSED' and
- third_datanode_status == 'CLOSED') or \
- (second_datanode_status == 'OPEN' and
- third_datanode_status == 'QUASI_CLOSED')
\ No newline at end of file
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ count_qausi_closed_container_datanodes = filter(
+ lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) >= 3 or \
+ len(count_qausi_closed_container_datanodes) >= 3
+ Blockade.blockade_join()
+ Blockade.blockade_status()
+ if len(count_closed_container_datanodes) < 3:
+ time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+ all_datanodes_container_status = \
+ ClusterUtils.findall_container_status(
+ FILE, INCREASED_SCALE)
+ count_closed_container_datanodes = filter(
+ lambda x: x == 'CLOSED', all_datanodes_container_status)
+ assert len(count_closed_container_datanodes) >= 3
+ _, output = \
+ ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+ assert re.search("Status: Success", output) is not None
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
index 54c31e8..06f4263 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
@@ -42,9 +42,8 @@ def setup():
exit_code, output = Blockade.blockade_status()
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
output
- OM = [x for x in CONTAINER_LIST if 'ozoneManager' in x]
- SCM = [x for x in CONTAINER_LIST if 'scm' in x]
- DATANODES = sorted(x for x in CONTAINER_LIST if 'datanode' in x)
+ OM, SCM, _, DATANODES = \
+ ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
"THREE")
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org