You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ms...@apache.org on 2019/02/11 14:38:41 UTC

[hadoop] branch trunk updated: HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi.

This is an automated email from the ASF dual-hosted git repository.

msingh pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 73b67b2  HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi.
73b67b2 is described below

commit 73b67b2df565d2466d6cda1fda0201f9abeab179
Author: Mukul Kumar Singh <ms...@apache.org>
AuthorDate: Mon Feb 11 20:08:25 2019 +0530

    HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi.
---
 .../main/blockade/clusterUtils/cluster_utils.py    | 117 ++++++++++++++++++-
 hadoop-ozone/dist/src/main/blockade/conftest.py    |  25 ++++-
 .../main/blockade/test_blockade_client_failure.py  | 124 +++++++++++++++++++++
 .../blockade/test_blockade_datanode_isolation.py   |   1 +
 .../dist/src/main/blockade/test_blockade_flaky.py  |   1 +
 .../main/blockade/test_blockade_mixed_failure.py   |   1 +
 ...t_blockade_mixed_failure_three_nodes_isolate.py |   1 +
 .../test_blockade_mixed_failure_two_nodes.py       |   1 +
 .../main/blockade/test_blockade_scm_isolation.py   |   1 +
 .../main/compose/ozoneblockade/docker-compose.yaml |   9 ++
 10 files changed, 272 insertions(+), 9 deletions(-)

diff --git a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
index f590f77..bf0b28f 100644
--- a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
+++ b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
@@ -22,6 +22,7 @@ import logging
 import time
 import re
 import yaml
+import os
 
 
 logger = logging.getLogger(__name__)
@@ -64,17 +65,18 @@ class ClusterUtils(object):
 
     @classmethod
     def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
-                  num_keys, key_size, replication_type, replication_factor):
+                  num_keys, key_size, replication_type, replication_factor,
+                  freon_client='ozoneManager'):
         # run freon
         cmd = "docker-compose -f %s " \
-              "exec ozoneManager /opt/hadoop/bin/ozone " \
+              "exec %s /opt/hadoop/bin/ozone " \
               "freon rk " \
               "--numOfVolumes %s " \
               "--numOfBuckets %s " \
               "--numOfKeys %s " \
               "--keySize %s " \
               "--replicationType %s " \
-              "--factor %s" % (docker_compose_file, num_volumes,
+              "--factor %s" % (docker_compose_file, freon_client, num_volumes,
                                num_buckets, num_keys, key_size,
                                replication_type, replication_factor)
         exit_code, output = cls.run_cmd(cmd)
@@ -188,4 +190,111 @@ class ClusterUtils(object):
         logger.info("All datanodes container status: %s",
                     ' '.join(all_datanode_container_status))
 
-        return all_datanode_container_status
\ No newline at end of file
+        return all_datanode_container_status
+
+    @classmethod
+    def create_volume(cls, docker_compose_file, volume_name):
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh volume create /%s --user root" % \
+                  (docker_compose_file, volume_name)
+        logger.info("Creating Volume %s", volume_name)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "Ozone volume create failed with output=[%s]" \
+                               % output
+
+    @classmethod
+    def delete_volume(cls, docker_compose_file, volume_name):
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh volume delete /%s" % (docker_compose_file, volume_name)
+        logger.info("Deleting Volume %s", volume_name)
+        exit_code, output = cls.run_cmd(command)
+        return exit_code, output
+
+    @classmethod
+    def create_bucket(cls, docker_compose_file, bucket_name, volume_name):
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+              "sh bucket create /%s/%s" % (docker_compose_file,
+                                           volume_name, bucket_name)
+        logger.info("Creating Bucket %s in volume %s",
+                    bucket_name, volume_name)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "Ozone bucket create failed with output=[%s]" \
+                               % output
+
+    @classmethod
+    def delete_bucket(cls, docker_compose_file, bucket_name, volume_name):
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh bucket delete /%s/%s" % (docker_compose_file,
+                                               volume_name, bucket_name)
+        logger.info("Running delete bucket of %s/%s", volume_name, bucket_name)
+        exit_code, output = cls.run_cmd(command)
+        return exit_code, output
+
+    @classmethod
+    def put_key(cls, docker_compose_file, bucket_name, volume_name,
+               filepath, key_name=None, replication_factor=None):
+        command = "docker-compose -f %s " \
+              "exec ozone_client ls  %s" % (docker_compose_file, filepath)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "%s does not exist" % filepath
+        if key_name is None:
+            key_name = os.path.basename(filepath)
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh key put /%s/%s/%s %s" % (docker_compose_file,
+                                               volume_name, bucket_name,
+                                               key_name, filepath)
+        if replication_factor:
+            command = "%s --replication=%s" % (command, replication_factor)
+        logger.info("Creating key %s in %s/%s", key_name,
+                    volume_name, bucket_name)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "Ozone put Key failed with output=[%s]" % output
+
+    @classmethod
+    def delete_key(cls, docker_compose_file, bucket_name, volume_name,
+                   key_name):
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh key delete /%s/%s/%s" \
+                  % (docker_compose_file, volume_name, bucket_name, key_name)
+        logger.info("Running delete key %s in %s/%s",
+                    key_name, volume_name, bucket_name)
+        exit_code, output = cls.run_cmd(command)
+        return exit_code, output
+
+    @classmethod
+    def get_key(cls, docker_compose_file, bucket_name, volume_name,
+               key_name, filepath=None):
+        if filepath is None:
+            filepath = '.'
+        command = "docker-compose -f %s " \
+              "exec ozone_client /opt/hadoop/bin/ozone " \
+                  "sh key get /%s/%s/%s %s" % (docker_compose_file,
+                                               volume_name, bucket_name,
+                                               key_name, filepath)
+        logger.info("Running get key %s in %s/%s", key_name,
+                    volume_name, bucket_name)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "Ozone get Key failed with output=[%s]" % output
+
+    @classmethod
+    def find_checksum(cls, docker_compose_file, filepath):
+        command = "docker-compose -f %s " \
+              "exec ozone_client md5sum  %s" % (docker_compose_file, filepath)
+        exit_code, output = cls.run_cmd(command)
+        assert exit_code == 0, "Cant find checksum"
+        myoutput = output.split("\n")
+        finaloutput = ""
+        for line in myoutput:
+            if line.find("Warning") >= 0 or line.find("is not a tty") >= 0:
+                logger.info("skip this line: %s", line)
+            else:
+                finaloutput = finaloutput + line
+        checksum = finaloutput.split(" ")
+        logger.info("Checksum of %s is : %s", filepath, checksum[0])
+        return checksum[0]
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/conftest.py b/hadoop-ozone/dist/src/main/blockade/conftest.py
index 31e2ccd..ff5bfc7 100644
--- a/hadoop-ozone/dist/src/main/blockade/conftest.py
+++ b/hadoop-ozone/dist/src/main/blockade/conftest.py
@@ -15,8 +15,10 @@
 
 import logging
 import os
+import time
+import subprocess
 
-
+EPOCH_TIME = int(time.time())
 def pytest_addoption(parser):
     parser.addoption("--output-dir",
                      action="store",
@@ -40,13 +42,14 @@ def pytest_addoption(parser):
 
 
 def pytest_configure(config):
+    global OUTPUT_DIR
     os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep
-    outputdir = config.option.output_dir
+    OUTPUT_DIR = "%s/%s" % (config.option.output_dir, EPOCH_TIME)
     try:
-        os.makedirs(outputdir)
+        os.makedirs(OUTPUT_DIR)
     except OSError, e:
         raise Exception(e.strerror + ": " + e.filename)
-    log_file = os.path.join(outputdir, "output.log")
+    log_file = os.path.join(OUTPUT_DIR, "output.log")
 
     if config.option.log_level == "trace":
         loglevel = eval("logging.DEBUG")
@@ -74,8 +77,20 @@ def pytest_report_teststatus(report):
     elif report.when == 'call':
         logger.info("TEST \"%s\" %s in %3.2f seconds" %
                     (name, report.outcome.upper(), report.duration))
+        log_file_path = "%s/%s_all_docker.log" % \
+                        (OUTPUT_DIR, name)
+        gather_docker_logs(log_file_path)
 
 
 def pytest_sessionfinish(session):
     logger = logging.getLogger('main')
-    logger.info("ALL TESTS FINISHED")
\ No newline at end of file
+    logger.info("ALL TESTS FINISHED")
+    logger.info("ALL logs present in following directory: %s", OUTPUT_DIR)
+
+
+def gather_docker_logs(log_file_path):
+    docker_compose_file = os.environ["DOCKER_COMPOSE_FILE"]
+    output = subprocess.check_output(["docker-compose", "-f",
+                                      docker_compose_file, "logs"])
+    with open(log_file_path, "w") as text_file:
+        text_file.write(output)
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
new file mode 100644
index 0000000..b8ecf01
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import time
+import logging
+from blockadeUtils.blockade import Blockade
+from clusterUtils.cluster_utils import ClusterUtils
+
+
+logger = logging.getLogger(__name__)
+parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
+                    "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
+SCALE = 3
+CONTAINER_LIST = []
+OM = []
+SCM = []
+DATANODES = []
+CLIENT = []
+
+
+def setup():
+    global CONTAINER_LIST, OM, SCM, DATANODES, CLIENT, ORIG_CHECKSUM, \
+        TEST_VOLUME_NAME, TEST_BUCKET_NAME
+    epoch_time = int(time.time())
+    TEST_VOLUME_NAME = "%s%s" % ("volume", epoch_time)
+    TEST_BUCKET_NAME = "%s%s" % ("bucket", epoch_time)
+    Blockade.blockade_destroy()
+    CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
+    exit_code, output = Blockade.blockade_status()
+    assert exit_code == 0, "blockade status command failed with output=[%s]" % \
+                           output
+    OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
+    SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
+    DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
+    CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST)
+
+    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+                                               "THREE", "ozone_client")
+    assert exit_code == 0, "freon run failed with output=[%s]" % output
+    ClusterUtils.create_volume(FILE, TEST_VOLUME_NAME)
+    ClusterUtils.create_bucket(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME)
+    ORIG_CHECKSUM = ClusterUtils.find_checksum(FILE, "/etc/passwd")
+
+
+def teardown():
+    logger.info("Inside teardown")
+    Blockade.blockade_destroy()
+
+
+def teardown_module():
+    ClusterUtils.cluster_destroy(FILE)
+
+
+def test_client_failure_isolate_two_datanodes():
+    """
+    In this test, all datanodes are isolated from each other.
+    two of the datanodes cannot communicate with any other node in the cluster.
+    Expectation :
+    Write should fail.
+    Keys written before parition created can be read.
+    """
+    test_key_name = "testkey1"
+    ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
+                         "/etc/passwd", key_name=test_key_name,
+                         replication_factor='THREE')
+    first_set = [OM[0], SCM[0], DATANODES[0], CLIENT[0]]
+    second_set = [DATANODES[1]]
+    third_set = [DATANODES[2]]
+    Blockade.blockade_create_partition(first_set, second_set, third_set)
+    Blockade.blockade_status()
+    exit_code, output = \
+        ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+    assert re.search(
+        "Allocate block failed, error:INTERNAL_ERROR",
+        output) is not None
+    ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
+                         test_key_name, "/tmp/")
+    key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
+
+    assert key_checksum == ORIG_CHECKSUM
+
+
+def test_client_failure_isolate_one_datanode():
+    """
+    In this test, one of the datanodes is isolated from all other nodes.
+    Expectation :
+    Write should pass.
+    Keys written before partition created can be read.
+    """
+    test_key_name = "testkey2"
+    ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
+                         "/etc/passwd", key_name=test_key_name,
+                         replication_factor='THREE')
+    first_set = [OM[0], SCM[0], DATANODES[0], DATANODES[1], CLIENT[0]]
+    second_set = [DATANODES[2]]
+    Blockade.blockade_create_partition(first_set, second_set)
+    Blockade.blockade_status()
+    exit_code, output = \
+        ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+    assert re.search("3 way commit failed", output) is not None
+    assert re.search("Status: Success", output) is not None
+    ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
+                         test_key_name, "/tmp/")
+    key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
+
+    assert key_checksum == ORIG_CHECKSUM
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
index eecc7ea..becc635 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
index 3da7164..3129600 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 6
 CONTAINER_LIST = []
 
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
index 69c865c..59755e0 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
index 255a686..ee4d031 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
index 634299b..a8a6f9b 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
index 0af9745..d2dd29a 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
                     "docker-compose.yaml")
+os.environ["DOCKER_COMPOSE_FILE"] = FILE
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
diff --git a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml
index 0a6a9d8..75f4bf0 100644
--- a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml
+++ b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml
@@ -47,3 +47,12 @@ services:
       environment:
           ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
       command: ["/opt/hadoop/bin/ozone","scm"]
+   ozone_client:
+       image: apache/hadoop-runner
+       volumes:
+         - ../..:/opt/hadoop
+       ports:
+         - 9869
+       command: ["tail", "-f","/etc/passwd"]
+       env_file:
+         - ./docker-config


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org