You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/08/06 23:57:03 UTC

[mesos] branch master updated (d96c89e -> 43e9c52)

This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from d96c89e  Updated Git repository URLs.
     new efe1421  Fixed the iptables deadlock in CNI port mapper plugin.
     new 43e9c52  Updated port mapper CNI test.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/common/values.hpp                              |  25 +++
 .../cni/plugins/port_mapper/port_mapper.cpp        |  32 +++-
 src/tests/containerizer/cni_isolator_tests.cpp     | 207 ++++++++++++---------
 3 files changed, 173 insertions(+), 91 deletions(-)


[mesos] 01/02: Fixed the iptables deadlock in CNI port mapper plugin.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit efe1421fba05368dd84135f2883a125feaa0842d
Author: Jie Yu <yu...@gmail.com>
AuthorDate: Wed Aug 1 21:51:08 2018 -0700

    Fixed the iptables deadlock in CNI port mapper plugin.
    
    It is possible that the port mapping cleanup command will cause iptables
    to deadlock if there are a lot of entires in the iptables, because the
    `sed` won't process the next line while executing `iptables -w -t nat -D
    ...`. But the executing of `iptables -w -t nat -D ...` might get stuck
    if the first command `iptables -w -t nat -S <TAG>` didn't finish
    (because the xtables lock is not released). The first command might not
    finish if it has a lot of output, filling the pipe that `sed` hasn't had
    a chance to process yet. See more details in MESOS-9127.
    
    This patch fixed the issue by writing the commands to a file and then
    executing them.
    
    Review: https://reviews.apache.org/r/68158/
---
 .../cni/plugins/port_mapper/port_mapper.cpp        | 32 ++++++++++++++++++----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
index f1a3d26..4e784ff 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
@@ -360,16 +360,38 @@ Try<Nothing> PortMapper::addPortMapping(
 
 Try<Nothing> PortMapper::delPortMapping()
 {
+  // The iptables command searches for the DNAT rules with tag
+  // "container_id: <CNI_CONTAINERID>", and if it exists goes ahead
+  // and deletes it.
+  //
+  // NOTE: We use a temp file here, instead of letting `sed` directly
+  // executing the iptables commands because otherwise, it is possible
+  // that the port mapping cleanup command will cause iptables to
+  // deadlock if there are a lot of entires in the iptables, because
+  // the `sed` won't process the next line while executing `iptables
+  // -w -t nat -D ...`. But the executing of `iptables -w -t nat -D
+  // ...` might get stuck if the first command `iptables -w -t nat -S
+  // <TAG>` didn't finish (because the xtables lock is not released).
+  // The first command might not finish if it has a lot of output,
+  // filling the pipe that `sed` hasn't had a chance to process yet.
+  // See details in MESOS-9127.
   string script = strings::format(
       R"~(
       #!/bin/sh
-      exec 1>&2
       set -x
+      set -e
+
+      FILE=$(mktemp)
+
+      cleanup() {
+        rm -f "$FILE"
+      }
+
+      trap cleanup EXIT
 
-      # The iptables command searches for the DNAT rules with tag
-      # "container_id: <CNI_CONTAINERID>", and if it exists goes ahead
-      # and deletes it.
-      iptables -w -t nat -S %s | sed "/%s/ s/-A/iptables -w -t nat -D/e")~",
+      iptables -w -t nat -S %s | sed -n "/%s/ s/-A/iptables -w -t nat -D/p" > $FILE
+      sh $FILE
+      )~",
       chain,
       getIptablesRuleTag()).get();
 


[mesos] 02/02: Updated port mapper CNI test.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 43e9c5214e2199a507ecbf8c0972bdf423f55a80
Author: Jie Yu <yu...@gmail.com>
AuthorDate: Fri Aug 3 23:09:26 2018 -0700

    Updated port mapper CNI test.
    
    This patch updated the port mapper CNI test to launch multiple
    containers concurrently. This would allow us to catch the scenarios
    where multiple iptables commands are executed concurrently.
    
    This test fails if the fix for MESOS-9125 is not included.
    
    Review: https://reviews.apache.org/r/68239
---
 src/common/values.hpp                          |  25 +++
 src/tests/containerizer/cni_isolator_tests.cpp | 207 +++++++++++++++----------
 2 files changed, 146 insertions(+), 86 deletions(-)

diff --git a/src/common/values.hpp b/src/common/values.hpp
index 39487b9..f87897f 100644
--- a/src/common/values.hpp
+++ b/src/common/values.hpp
@@ -19,6 +19,7 @@
 
 #include <limits>
 #include <type_traits>
+#include <vector>
 
 #include <mesos/mesos.hpp>
 
@@ -54,6 +55,30 @@ Try<IntervalSet<T>> rangesToIntervalSet(const Value::Ranges& ranges)
 }
 
 
+template <typename T>
+Try<std::vector<T>> rangesToVector(const Value::Ranges& ranges)
+{
+  std::vector<T> result;
+
+  static_assert(
+      std::is_integral<T>::value,
+      "vector<T> must use an integral type");
+
+  foreach (const Value::Range& range, ranges.range()) {
+    if (range.begin() < std::numeric_limits<T>::min() ||
+        range.end() > std::numeric_limits<T>::max()) {
+      return Error("Range is out of bounds");
+    }
+
+    for (T value = range.begin(); value <= range.end(); value++) {
+      result.push_back(value);
+    }
+  }
+
+  return result;
+}
+
+
 // Convert IntervalSet value to Ranges value.
 template <typename T>
 Value::Ranges intervalSetToRanges(const IntervalSet<T>& set)
diff --git a/src/tests/containerizer/cni_isolator_tests.cpp b/src/tests/containerizer/cni_isolator_tests.cpp
index 90d2d41..dda5004 100644
--- a/src/tests/containerizer/cni_isolator_tests.cpp
+++ b/src/tests/containerizer/cni_isolator_tests.cpp
@@ -14,11 +14,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <algorithm>
+
 #include <gmock/gmock.h>
 
 #include <gtest/gtest.h>
 
 #include <process/clock.hpp>
+#include <process/collect.hpp>
+
+#include "common/values.hpp"
 
 #include "slave/gc_process.hpp"
 
@@ -49,6 +54,8 @@ using process::Clock;
 using process::Future;
 using process::Owned;
 
+using process::collect;
+
 using slave::Slave;
 
 using std::ostream;
@@ -141,14 +148,16 @@ public:
     Try<Nothing> result = setupMockPlugin(
         strings::format(R"~(
         #!/bin/sh
-        echo "{"
-        echo "  \"ip4\": {"
-        echo "    \"ip\": \"%s/%d\""
-        echo "  },"
-        echo "  \"dns\": {"
-        echo "    \"nameservers\": [ \"%s\" ]"
-        echo "  }"
-        echo "}"
+        if [ x$CNI_COMMAND = xADD ]; then
+          echo "{"
+          echo "  \"ip4\": {"
+          echo "    \"ip\": \"%s/%d\""
+          echo "  },"
+          echo "  \"dns\": {"
+          echo "    \"nameservers\": [ \"%s\" ]"
+          echo "  }"
+          echo "}"
+        fi
         )~",
         hostNetwork->address(),
         hostNetwork->prefix(),
@@ -1790,6 +1799,8 @@ public:
   {
     CniIsolatorTest::SetUp();
 
+    cleanup();
+
     Try<string> mockConfig = os::read(
         path::join(cniConfigDir, MESOS_MOCK_CNI_CONFIG));
 
@@ -1819,6 +1830,13 @@ public:
 
   void TearDown() override
   {
+    cleanup();
+
+    CniIsolatorTest::TearDown();
+  }
+
+  void cleanup()
+  {
     // This is a best effort cleanup of the
     // `MESOS_TEST_PORT_MAPPER_CHAIN`. We shouldn't fail and bail on
     // rest of the `TearDown` if we are not able to clean up the
@@ -1831,7 +1849,7 @@ public:
         iptables -w -t nat --list %s
 
         if [ $? -eq 0 ]; then
-          iptables -w -t nat -D OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j  %s
+          iptables -w -t nat -D OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j %s
           iptables -w -t nat -D PREROUTING -m addrtype --dst-type LOCAL -j %s
           iptables -w -t nat -F %s
           iptables -w -t nat -X %s
@@ -1849,14 +1867,14 @@ public:
                  << stringify(MESOS_TEST_PORT_MAPPER_CHAIN)
                  << ": " << result.error();
     }
-
-    CniIsolatorTest::TearDown();
   }
 };
 
 
-TEST_F(CniIsolatorPortMapperTest, ROOT_INTERNET_CURL_PortMapper)
+TEST_F(CniIsolatorPortMapperTest, ROOT_NC_PortMapper)
 {
+  constexpr size_t NUM_CONTAINERS = 3;
+
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
 
@@ -1870,10 +1888,6 @@ TEST_F(CniIsolatorPortMapperTest, ROOT_INTERNET_CURL_PortMapper)
   flags.network_cni_plugins_dir = cniPluginDir + ":" + getLauncherDir();
   flags.network_cni_config_dir = cniConfigDir;
 
-  // Need to increase the registration timeout to give time for
-  // downloading and provisioning the "nginx:alpine" image.
-  flags.executor_registration_timeout = Minutes(5);
-
   Owned<MasterDetector> detector = master.get()->createDetector();
 
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
@@ -1903,101 +1917,122 @@ TEST_F(CniIsolatorPortMapperTest, ROOT_INTERNET_CURL_PortMapper)
 
   Resources resources(offers.get()[0].resources());
 
-  // Make sure we have a `ports` resource.
+  // Make sure we have sufficient `ports` resource.
   ASSERT_SOME(resources.ports());
-  ASSERT_LE(1, resources.ports()->range().size());
 
-  // Select a random port from the offer.
-  std::srand(std::time(0));
-  Value::Range ports = resources.ports()->range(0);
-  uint16_t hostPort =
-    ports.begin() + std::rand() % (ports.end() - ports.begin() + 1);
+  Try<vector<uint16_t>> _ports =
+    values::rangesToVector<uint16_t>(resources.ports().get());
 
-  CommandInfo command;
-  command.set_shell(false);
+  ASSERT_SOME(_ports);
 
-  TaskInfo task = createTask(
-      offer.slave_id(),
-      Resources::parse(
-        "cpus:1;mem:128;"
-        "ports:[" + stringify(hostPort) + "," + stringify(hostPort) + "]")
-        .get(),
-      command);
+  // Require "2 * NUM_CONTAINERS" here as we need container ports as
+  // well. This is because the containers are actually running on the
+  // host network namespace.
+  ASSERT_LE(NUM_CONTAINERS * 2, _ports->size());
 
-  ContainerInfo container = createContainerInfo("nginx:alpine");
+  vector<uint16_t> ports = _ports.get();
 
-  // Make sure the container joins the test CNI port-mapper network.
-  NetworkInfo* networkInfo = container.add_network_infos();
-  networkInfo->set_name(MESOS_CNI_PORT_MAPPER_NETWORK);
+  // Randomize the ports from the offer.
+  std::random_shuffle(ports.begin(), ports.end());
 
-  NetworkInfo::PortMapping* portMapping = networkInfo->add_port_mappings();
-  portMapping->set_container_port(80);
-  portMapping->set_host_port(hostPort);
+  vector<TaskInfo> tasks;
+  vector<uint16_t> hostPorts(NUM_CONTAINERS);
+  vector<uint16_t> containerPorts(NUM_CONTAINERS);
 
-  // Set the container for the task.
-  task.mutable_container()->CopyFrom(container);
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    hostPorts[i] = ports[i];
+    containerPorts[i] = ports[ports.size() - 1 - i];
 
-  Future<TaskStatus> statusStarting;
-  Future<TaskStatus> statusRunning;
-  EXPECT_CALL(sched, statusUpdate(&driver, _))
-    .WillOnce(FutureArg<1>(&statusStarting))
-    .WillOnce(FutureArg<1>(&statusRunning));
+    CommandInfo command;
+    command.set_value("nc -l -p " + stringify(containerPorts[i]));
 
-  driver.launchTasks(offer.id(), {task});
+    TaskInfo task = createTask(
+        offer.slave_id(),
+        Resources::parse(
+            "cpus:0.1;mem:32;ports:[" +
+            stringify(hostPorts[i]) + "," +
+            stringify(hostPorts[i]) + "]")
+          .get(),
+        command);
 
-  AWAIT_READY_FOR(statusStarting, Seconds(60));
-  EXPECT_EQ(task.task_id(), statusStarting->task_id());
-  EXPECT_EQ(TASK_STARTING, statusStarting->state());
+    ContainerInfo container = createContainerInfo();
 
-  AWAIT_READY_FOR(statusRunning, Seconds(300));
-  EXPECT_EQ(task.task_id(), statusRunning->task_id());
-  EXPECT_EQ(TASK_RUNNING, statusRunning->state());
-  ASSERT_TRUE(statusRunning->has_container_status());
+    // Make sure the container joins the test CNI port-mapper network.
+    NetworkInfo* networkInfo = container.add_network_infos();
+    networkInfo->set_name(MESOS_CNI_PORT_MAPPER_NETWORK);
 
-  ContainerID containerId = statusRunning->container_status().container_id();
-  ASSERT_EQ(1, statusRunning->container_status().network_infos().size());
+    NetworkInfo::PortMapping* portMapping = networkInfo->add_port_mappings();
+    portMapping->set_container_port(containerPorts[i]);
+    portMapping->set_host_port(hostPorts[i]);
 
-  // Try connecting to the nginx server on port 80 through a
-  // non-loopback IP address on `hostPort`.
-  Try<net::IP::Network> hostNetwork = getNonLoopbackIP();
-  ASSERT_SOME(hostNetwork);
+    // Set the container for the task.
+    task.mutable_container()->CopyFrom(container);
 
-  // `TASK_RUNNING` does not guarantee that the service is running.
-  // Hence, we need to re-try the service multiple times.
-  Duration waited = Duration::zero();
-  do {
-    Try<string> connect = os::shell(
-        "curl -I http://" + stringify(hostNetwork->address()) +
-        ":" + stringify(hostPort));
-
-    if (connect.isSome()) {
-      LOG(INFO) << "Connection to nginx successful: " << connect.get();
-      break;
-    }
+    tasks.push_back(task);
+  }
+
+  EXPECT_CALL(sched, statusUpdate(&driver, TaskStatusStateEq(TASK_STARTING)))
+    .WillRepeatedly(Return());
 
-    os::sleep(Milliseconds(100));
-    waited += Milliseconds(100);
-  } while (waited < Seconds(10));
+  vector<Future<TaskStatus>> statusesRunning(NUM_CONTAINERS);
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    EXPECT_CALL(sched, statusUpdate(&driver, TaskStatusStateEq(TASK_RUNNING)))
+      .WillOnce(FutureArg<1>(&statusesRunning[i]))
+      .RetiresOnSaturation();
+  }
 
-  EXPECT_LE(waited, Seconds(5));
+  driver.launchTasks(offer.id(), tasks);
 
-  // Kill the task.
-  Future<TaskStatus> statusKilled;
-  EXPECT_CALL(sched, statusUpdate(&driver, _))
-    .WillOnce(FutureArg<1>(&statusKilled));
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    AWAIT_READY(statusesRunning[i]);
+    ASSERT_TRUE(statusesRunning[i]->has_container_status());
+    ASSERT_EQ(1, statusesRunning[i]->container_status().network_infos().size());
+  }
+
+  vector<Future<TaskStatus>> statusesFinished(NUM_CONTAINERS);
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    EXPECT_CALL(sched, statusUpdate(&driver, TaskStatusStateEq(TASK_FINISHED)))
+      .WillOnce(FutureArg<1>(&statusesFinished[i]))
+      .RetiresOnSaturation();
+  }
 
   // Wait for the executor to exit. We are using 'gc.schedule' as a
   // proxy event to monitor the exit of the executor.
-  Future<Nothing> gcSchedule = FUTURE_DISPATCH(
-      _, &slave::GarbageCollectorProcess::schedule);
+  vector<Future<Nothing>> gcSchedules(NUM_CONTAINERS);
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    gcSchedules[i] = FUTURE_DISPATCH(
+        _, &slave::GarbageCollectorProcess::schedule);
+  }
 
-  driver.killTask(task.task_id());
+  // Try connecting to each nc server on the given container port
+  // through a non-loopback IP address on the corresponding host port.
+  // The nc server will exit after processing the connection.
+  Try<net::IP::Network> hostNetwork = getNonLoopbackIP();
+  ASSERT_SOME(hostNetwork);
 
-  AWAIT_READY(statusKilled);
+  for (size_t i = 0; i < NUM_CONTAINERS; i++) {
+    // `TASK_RUNNING` does not guarantee that the service is running.
+    // Hence, we need to re-try the service multiple times.
+    Duration waited = Duration::zero();
+    do {
+      Try<string> connect = os::shell(
+          "echo foo | nc " + stringify(hostNetwork->address()) +
+          " " + stringify(hostPorts[i]));
+
+      if (connect.isSome()) {
+        LOG(INFO) << "Connection to nc server successful: " << connect.get();
+        break;
+      }
 
-  EXPECT_EQ(TASK_KILLED, statusKilled->state());
+      os::sleep(Milliseconds(100));
+      waited += Milliseconds(100);
+    } while (waited < Seconds(10));
 
-  AWAIT_READY(gcSchedule);
+    EXPECT_LE(waited, Seconds(5));
+  }
+
+  AWAIT_READY(collect(statusesFinished));
+  AWAIT_READY(collect(gcSchedules));
 
   // Make sure the iptables chain `MESOS-TEST-PORT-MAPPER-CHAIN`
   // doesn't have any iptable rules once the task is killed. The only