You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@mesos.apache.org by GitBox <gi...@apache.org> on 2018/08/06 16:55:57 UTC

[GitHub] jieyu closed pull request #306: Fixed the iptables deadlock in CNI port mapper plugin.

jieyu closed pull request #306: Fixed the iptables deadlock in CNI port mapper plugin.
URL: https://github.com/apache/mesos/pull/306
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/common/values.hpp b/src/common/values.hpp
index 39487b955f..f87897faf9 100644
--- a/src/common/values.hpp
+++ b/src/common/values.hpp
@@ -19,6 +19,7 @@
 
 #include <limits>
 #include <type_traits>
+#include <vector>
 
 #include <mesos/mesos.hpp>
 
@@ -54,6 +55,30 @@ Try<IntervalSet<T>> rangesToIntervalSet(const Value::Ranges& ranges)
 }
 
 
+template <typename T>
+Try<std::vector<T>> rangesToVector(const Value::Ranges& ranges)
+{
+  std::vector<T> result;
+
+  static_assert(
+      std::is_integral<T>::value,
+      "vector<T> must use an integral type");
+
+  foreach (const Value::Range& range, ranges.range()) {
+    if (range.begin() < std::numeric_limits<T>::min() ||
+        range.end() > std::numeric_limits<T>::max()) {
+      return Error("Range is out of bounds");
+    }
+
+    for (T value = range.begin(); value <= range.end(); value++) {
+      result.push_back(value);
+    }
+  }
+
+  return result;
+}
+
+
 // Convert IntervalSet value to Ranges value.
 template <typename T>
 Value::Ranges intervalSetToRanges(const IntervalSet<T>& set)
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
index f1a3d263b7..d407fa7240 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp
@@ -363,13 +363,34 @@ Try<Nothing> PortMapper::delPortMapping()
   string script = strings::format(
       R"~(
       #!/bin/sh
-      exec 1>&2
       set -x
 
+      FILE=$(mktemp)
+
+      cleanup() {
+        rm -f "$FILE"
+      }
+
+      trap cleanup EXIT
+
       # The iptables command searches for the DNAT rules with tag
       # "container_id: <CNI_CONTAINERID>", and if it exists goes ahead
       # and deletes it.
-      iptables -w -t nat -S %s | sed "/%s/ s/-A/iptables -w -t nat -D/e")~",
+      #
+      # NOTE: We use a temp file here, instead of letting `sed`
+      # directly executing the iptables commands because otherwise, it
+      # is possible that the port mapping cleanup command will cause
+      # iptables to deadlock if there are a lot of entires in the
+      # iptables, because the `sed` won't process the next line while
+      # executing 'iptables -w -t nat -D ...'. But the executing of
+      # 'iptables -w -t nat -D ...' might get stuck if the first
+      # command 'iptables -w -t nat -S %s' didn't finish (because the
+      # xtables lock is not released). The first command might not
+      # finish if it has a lot of output, filling the pipe that `sed`
+      # hasn't had a chance to process yet. See details in MESOS-9127.
+      iptables -w -t nat -S %s | sed -n "/%s/ s/-A/iptables -w -t nat -D/p" > $FILE
+      sh $FILE
+      )~",
       chain,
       getIptablesRuleTag()).get();
 
diff --git a/src/tests/containerizer/cni_isolator_tests.cpp b/src/tests/containerizer/cni_isolator_tests.cpp
index 90d2d4103c..93e0299b7a 100644
--- a/src/tests/containerizer/cni_isolator_tests.cpp
+++ b/src/tests/containerizer/cni_isolator_tests.cpp
@@ -14,12 +14,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <algorithm>
+
 #include <gmock/gmock.h>
 
 #include <gtest/gtest.h>
 
 #include <process/clock.hpp>
 
+#include "common/values.hpp"
+
 #include "slave/gc_process.hpp"
 
 #include "slave/containerizer/fetcher.hpp"
@@ -1790,6 +1794,8 @@ class CniIsolatorPortMapperTest : public CniIsolatorTest
   {
     CniIsolatorTest::SetUp();
 
+    cleanup();
+
     Try<string> mockConfig = os::read(
         path::join(cniConfigDir, MESOS_MOCK_CNI_CONFIG));
 
@@ -1818,6 +1824,13 @@ class CniIsolatorPortMapperTest : public CniIsolatorTest
   }
 
   void TearDown() override
+  {
+    cleanup();
+
+    CniIsolatorTest::TearDown();
+  }
+
+  void cleanup()
   {
     // This is a best effort cleanup of the
     // `MESOS_TEST_PORT_MAPPER_CHAIN`. We shouldn't fail and bail on
@@ -1831,7 +1844,7 @@ class CniIsolatorPortMapperTest : public CniIsolatorTest
         iptables -w -t nat --list %s
 
         if [ $? -eq 0 ]; then
-          iptables -w -t nat -D OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j  %s
+          iptables -w -t nat -D OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j %s
           iptables -w -t nat -D PREROUTING -m addrtype --dst-type LOCAL -j %s
           iptables -w -t nat -F %s
           iptables -w -t nat -X %s
@@ -1849,14 +1862,14 @@ class CniIsolatorPortMapperTest : public CniIsolatorTest
                  << stringify(MESOS_TEST_PORT_MAPPER_CHAIN)
                  << ": " << result.error();
     }
-
-    CniIsolatorTest::TearDown();
   }
 };
 
 
-TEST_F(CniIsolatorPortMapperTest, ROOT_INTERNET_CURL_PortMapper)
+TEST_F(CniIsolatorPortMapperTest, ROOT_NC_PortMapper)
 {
+  constexpr int NUM_CONTAINERS = 3;
+
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
 
@@ -1903,101 +1916,133 @@ TEST_F(CniIsolatorPortMapperTest, ROOT_INTERNET_CURL_PortMapper)
 
   Resources resources(offers.get()[0].resources());
 
-  // Make sure we have a `ports` resource.
+  // Make sure we have sufficient `ports` resource.
   ASSERT_SOME(resources.ports());
-  ASSERT_LE(1, resources.ports()->range().size());
 
-  // Select a random port from the offer.
-  std::srand(std::time(0));
-  Value::Range ports = resources.ports()->range(0);
-  uint16_t hostPort =
-    ports.begin() + std::rand() % (ports.end() - ports.begin() + 1);
+  Try<vector<uint16_t>> _ports =
+    values::rangesToVector<uint16_t>(resources.ports().get());
 
-  CommandInfo command;
-  command.set_shell(false);
+  ASSERT_SOME(_ports);
 
-  TaskInfo task = createTask(
-      offer.slave_id(),
-      Resources::parse(
-        "cpus:1;mem:128;"
-        "ports:[" + stringify(hostPort) + "," + stringify(hostPort) + "]")
-        .get(),
-      command);
+  // Require "2 * NUM_CONTAINERS" here as we need container ports as
+  // well. This is because the containers are actually running on the
+  // host network namespace.
+  ASSERT_LE(NUM_CONTAINERS * 2, _ports->size());
 
-  ContainerInfo container = createContainerInfo("nginx:alpine");
+  vector<uint16_t> ports = _ports.get();
 
-  // Make sure the container joins the test CNI port-mapper network.
-  NetworkInfo* networkInfo = container.add_network_infos();
-  networkInfo->set_name(MESOS_CNI_PORT_MAPPER_NETWORK);
+  // Randomize the ports from the offer.
+  std::random_shuffle(ports.begin(), ports.end());
 
-  NetworkInfo::PortMapping* portMapping = networkInfo->add_port_mappings();
-  portMapping->set_container_port(80);
-  portMapping->set_host_port(hostPort);
+  vector<TaskInfo> tasks;
+  uint16_t hostPort[NUM_CONTAINERS];
+  uint16_t containerPort[NUM_CONTAINERS];
 
-  // Set the container for the task.
-  task.mutable_container()->CopyFrom(container);
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    hostPort[i] = ports[i];
+    containerPort[i] = ports[ports.size() - 1 - i];
 
-  Future<TaskStatus> statusStarting;
-  Future<TaskStatus> statusRunning;
-  EXPECT_CALL(sched, statusUpdate(&driver, _))
-    .WillOnce(FutureArg<1>(&statusStarting))
-    .WillOnce(FutureArg<1>(&statusRunning));
+    CommandInfo command;
+    command.set_value("nc -l -p " + stringify(containerPort[i]));
 
-  driver.launchTasks(offer.id(), {task});
+    TaskInfo task = createTask(
+        offer.slave_id(),
+        Resources::parse(
+            "cpus:0.1;mem:32;ports:[" +
+            stringify(hostPort[i]) + "," +
+            stringify(hostPort[i]) + "]")
+          .get(),
+        command);
 
-  AWAIT_READY_FOR(statusStarting, Seconds(60));
-  EXPECT_EQ(task.task_id(), statusStarting->task_id());
-  EXPECT_EQ(TASK_STARTING, statusStarting->state());
+    ContainerInfo container = createContainerInfo();
 
-  AWAIT_READY_FOR(statusRunning, Seconds(300));
-  EXPECT_EQ(task.task_id(), statusRunning->task_id());
-  EXPECT_EQ(TASK_RUNNING, statusRunning->state());
-  ASSERT_TRUE(statusRunning->has_container_status());
+    // Make sure the container joins the test CNI port-mapper network.
+    NetworkInfo* networkInfo = container.add_network_infos();
+    networkInfo->set_name(MESOS_CNI_PORT_MAPPER_NETWORK);
 
-  ContainerID containerId = statusRunning->container_status().container_id();
-  ASSERT_EQ(1, statusRunning->container_status().network_infos().size());
+    NetworkInfo::PortMapping* portMapping = networkInfo->add_port_mappings();
+    portMapping->set_container_port(containerPort[i]);
+    portMapping->set_host_port(hostPort[i]);
 
-  // Try connecting to the nginx server on port 80 through a
-  // non-loopback IP address on `hostPort`.
-  Try<net::IP::Network> hostNetwork = getNonLoopbackIP();
-  ASSERT_SOME(hostNetwork);
+    // Set the container for the task.
+    task.mutable_container()->CopyFrom(container);
 
-  // `TASK_RUNNING` does not guarantee that the service is running.
-  // Hence, we need to re-try the service multiple times.
-  Duration waited = Duration::zero();
-  do {
-    Try<string> connect = os::shell(
-        "curl -I http://" + stringify(hostNetwork->address()) +
-        ":" + stringify(hostPort));
-
-    if (connect.isSome()) {
-      LOG(INFO) << "Connection to nginx successful: " << connect.get();
-      break;
-    }
+    tasks.push_back(task);
+  }
 
-    os::sleep(Milliseconds(100));
-    waited += Milliseconds(100);
-  } while (waited < Seconds(10));
+  Future<TaskStatus> statusRunning[NUM_CONTAINERS];
 
-  EXPECT_LE(waited, Seconds(5));
+  EXPECT_CALL(sched, statusUpdate(&driver, TaskStatusStateEq(TASK_STARTING)))
+    .WillRepeatedly(Return());
 
-  // Kill the task.
-  Future<TaskStatus> statusKilled;
-  EXPECT_CALL(sched, statusUpdate(&driver, _))
-    .WillOnce(FutureArg<1>(&statusKilled));
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    EXPECT_CALL(sched, statusUpdate(&driver, TaskStatusStateEq(TASK_RUNNING)))
+      .WillOnce(FutureArg<1>(&statusRunning[i]))
+      .RetiresOnSaturation();
+  }
+
+  driver.launchTasks(offer.id(), tasks);
+
+  ContainerID containerId[NUM_CONTAINERS];
+
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    AWAIT_READY_FOR(statusRunning[i], Seconds(300));
+    ASSERT_TRUE(statusRunning[i]->has_container_status());
+
+    containerId[i] = statusRunning[i]->container_status().container_id();
+    ASSERT_EQ(1, statusRunning[i]->container_status().network_infos().size());
+  }
+
+  Future<TaskStatus> statusFinished[NUM_CONTAINERS];
+
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    EXPECT_CALL(sched, statusUpdate(&driver, _))
+      .WillOnce(FutureArg<1>(&statusFinished[i]))
+      .RetiresOnSaturation();
+    }
 
   // Wait for the executor to exit. We are using 'gc.schedule' as a
   // proxy event to monitor the exit of the executor.
-  Future<Nothing> gcSchedule = FUTURE_DISPATCH(
-      _, &slave::GarbageCollectorProcess::schedule);
+  Future<Nothing> gcSchedule[NUM_CONTAINERS];
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    gcSchedule[i] = FUTURE_DISPATCH(
+        _, &slave::GarbageCollectorProcess::schedule);
+  }
 
-  driver.killTask(task.task_id());
+  // Try connecting to the nginx server on port 80 through a
+  // non-loopback IP address on `hostPort`.
+  Try<net::IP::Network> hostNetwork = getNonLoopbackIP();
+  ASSERT_SOME(hostNetwork);
 
-  AWAIT_READY(statusKilled);
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    // `TASK_RUNNING` does not guarantee that the service is running.
+    // Hence, we need to re-try the service multiple times.
+    Duration waited = Duration::zero();
+    do {
+      Try<string> connect = os::shell(
+          "echo foo | nc " + stringify(hostNetwork->address()) +
+          " " + stringify(hostPort[i]));
+
+      if (connect.isSome()) {
+        LOG(INFO) << "Connection to nginx successful: " << connect.get();
+        break;
+      }
 
-  EXPECT_EQ(TASK_KILLED, statusKilled->state());
+      os::sleep(Milliseconds(100));
+      waited += Milliseconds(100);
+    } while (waited < Seconds(10));
 
-  AWAIT_READY(gcSchedule);
+    EXPECT_LE(waited, Seconds(5));
+  }
+
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    AWAIT_READY(statusFinished[i]);
+    EXPECT_EQ(TASK_FINISHED, statusFinished[i]->state());
+  }
+
+  for (int i = 0; i < NUM_CONTAINERS; i++) {
+    AWAIT_READY(gcSchedule[i]);
+  }
 
   // Make sure the iptables chain `MESOS-TEST-PORT-MAPPER-CHAIN`
   // doesn't have any iptable rules once the task is killed. The only


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services