You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwhisk.apache.org by ma...@apache.org on 2017/10/17 08:43:12 UTC

[incubator-openwhisk] branch master updated: Add option to use docker/runc respectively. (#2828)

This is an automated email from the ASF dual-hosted git repository.

markusthoemmes pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new febde00  Add option to use docker/runc respectively. (#2828)
febde00 is described below

commit febde00425f0624b132f48bcd90df3f0f21c8f36
Author: David Grove <dg...@users.noreply.github.com>
AuthorDate: Tue Oct 17 04:43:09 2017 -0400

    Add option to use docker/runc respectively. (#2828)
    
    Restore config option to use docker pause/unpause instead of docker-runc pause/unpause in the invoker.
    Works around the problem of incompatible versions of docker-runc in invoker container and hosting environment.
---
 ansible/group_vars/all                             |  2 ++
 ansible/roles/invoker/tasks/deploy.yml             |  1 +
 ansible/templates/whisk.properties.j2              |  1 +
 .../src/main/scala/whisk/core/WhiskConfig.scala    |  2 ++
 .../containerpool/docker/DockerContainer.scala     | 23 +++++++++++++---------
 .../docker/DockerContainerFactory.scala            |  8 ++++++--
 .../main/scala/whisk/core/invoker/Invoker.scala    |  3 ++-
 docs/deploy.md                                     | 16 +++++++++++++--
 .../docker/test/DockerContainerTests.scala         | 22 +++++++++++++++++----
 9 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/ansible/group_vars/all b/ansible/group_vars/all
index 88ce571..e7dda4e 100644
--- a/ansible/group_vars/all
+++ b/ansible/group_vars/all
@@ -159,6 +159,8 @@ invoker:
   instances: "{{ groups['invokers'] | length }}"
   # Specify if it is allowed to deploy more than 1 invoker on a single machine.
   allowMultipleInstances: "{{ invoker_allow_multiple_instances | default(false) }}"
+  # Specify if it should use docker-runc or docker to pause/unpause containers
+  useRunc: "{{ invoker_use_runc | default(true) }}"
   docker:
     become: "{{ invoker_docker_become | default(false) }}"
 
diff --git a/ansible/roles/invoker/tasks/deploy.yml b/ansible/roles/invoker/tasks/deploy.yml
index 75df82e..413a9c6 100644
--- a/ansible/roles/invoker/tasks/deploy.yml
+++ b/ansible/roles/invoker/tasks/deploy.yml
@@ -139,6 +139,7 @@
         -e INVOKER_CONTAINER_DNS='{{ invoker_container_network_dns_servers | default()}}'
         -e INVOKER_NUMCORE='{{ invoker.numcore }}'
         -e INVOKER_CORESHARE='{{ invoker.coreshare }}'
+        -e INVOKER_USE_RUNC='{{ invoker.useRunc }}'
         -e WHISK_LOGS_DIR='{{ whisk_logs_dir }}'
         -v /sys/fs/cgroup:/sys/fs/cgroup
         -v /run/runc:/run/runc
diff --git a/ansible/templates/whisk.properties.j2 b/ansible/templates/whisk.properties.j2
index 66aa64b..89cf8d6 100644
--- a/ansible/templates/whisk.properties.j2
+++ b/ansible/templates/whisk.properties.j2
@@ -62,6 +62,7 @@ invoker.container.policy={{ invoker_container_policy_name | default()}}
 invoker.container.dns={{ invoker_container_network_dns_servers | default()}}
 invoker.numcore={{ invoker.numcore }}
 invoker.coreshare={{ invoker.coreshare }}
+invoker.useRunc={{ invoker.useRunc }}
 invoker.instances={{ invoker.instances }}
 
 main.docker.endpoint={{ hostvars[groups["controllers"]|first].ansible_host }}:{{ docker.port }}
diff --git a/common/scala/src/main/scala/whisk/core/WhiskConfig.scala b/common/scala/src/main/scala/whisk/core/WhiskConfig.scala
index 254181b..5ae80d3 100644
--- a/common/scala/src/main/scala/whisk/core/WhiskConfig.scala
+++ b/common/scala/src/main/scala/whisk/core/WhiskConfig.scala
@@ -64,6 +64,7 @@ class WhiskConfig(requiredProperties: Map[String, String],
     if (this(WhiskConfig.invokerContainerDns) == "") Seq() else this(WhiskConfig.invokerContainerDns).split(" ").toSeq
   val invokerNumCore = this(WhiskConfig.invokerNumCore)
   val invokerCoreShare = this(WhiskConfig.invokerCoreShare)
+  val invokerUseRunc = this.getAsBoolean(WhiskConfig.invokerUseRunc, true)
 
   val wskApiHost = this(WhiskConfig.wskApiProtocol) + "://" + this(WhiskConfig.wskApiHostname) + ":" + this(
     WhiskConfig.wskApiPort)
@@ -188,6 +189,7 @@ object WhiskConfig {
   val invokerContainerDns = "invoker.container.dns"
   val invokerNumCore = "invoker.numcore"
   val invokerCoreShare = "invoker.coreshare"
+  val invokerUseRunc = "invoker.use.runc"
 
   val wskApiProtocol = "whisk.api.host.proto"
   val wskApiPort = "whisk.api.host.port"
diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
index 89960e3..a7b1785 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
@@ -49,6 +49,7 @@ object DockerContainer {
    * @param network network to launch the container in
    * @param dnsServers list of dns servers to use in the container
    * @param name optional name for the container
+   * @param useRunc use docker-runc to pause/unpause container?
    * @return a Future which either completes with a DockerContainer or one of two specific failures
    */
   def create(transid: TransactionId,
@@ -60,6 +61,7 @@ object DockerContainer {
              network: String = "bridge",
              dnsServers: Seq[String] = Seq(),
              name: Option[String] = None,
+             useRunc: Boolean = true,
              dockerRunParameters: Map[String, Set[String]])(implicit docker: DockerApiWithFileAccess,
                                                             runc: RuncApi,
                                                             as: ActorSystem,
@@ -105,7 +107,7 @@ object DockerContainer {
           docker.rm(id)
           Future.failed(WhiskContainerStartupError(s"Failed to obtain IP address of container '${id.asString}'."))
       }
-    } yield new DockerContainer(id, ip)
+    } yield new DockerContainer(id, ip, useRunc)
   }
 }
 
@@ -119,12 +121,13 @@ object DockerContainer {
  * @param id the id of the container
  * @param addr the ip of the container
  */
-class DockerContainer(protected val id: ContainerId, protected val addr: ContainerAddress)(
-  implicit docker: DockerApiWithFileAccess,
-  runc: RuncApi,
-  as: ActorSystem,
-  protected val ec: ExecutionContext,
-  protected val logging: Logging)
+class DockerContainer(protected val id: ContainerId,
+                      protected val addr: ContainerAddress,
+                      protected val useRunc: Boolean)(implicit docker: DockerApiWithFileAccess,
+                                                      runc: RuncApi,
+                                                      as: ActorSystem,
+                                                      protected val ec: ExecutionContext,
+                                                      protected val logging: Logging)
     extends Container
     with DockerActionLogDriver {
 
@@ -135,8 +138,10 @@ class DockerContainer(protected val id: ContainerId, protected val addr: Contain
   protected val waitForOomState: FiniteDuration = 2.seconds
   protected val filePollInterval: FiniteDuration = 100.milliseconds
 
-  def suspend()(implicit transid: TransactionId): Future[Unit] = runc.pause(id)
-  def resume()(implicit transid: TransactionId): Future[Unit] = runc.resume(id)
+  def suspend()(implicit transid: TransactionId): Future[Unit] =
+    if (useRunc) { runc.pause(id) } else { docker.pause(id) }
+  def resume()(implicit transid: TransactionId): Future[Unit] =
+    if (useRunc) { runc.resume(id) } else { docker.unpause(id) }
   override def destroy()(implicit transid: TransactionId): Future[Unit] = {
     super.destroy()
     docker.rm(id)
diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
index 61e9b7b..4f5a733 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
@@ -64,6 +64,7 @@ class DockerContainerFactory(config: WhiskConfig, instance: InstanceId, paramete
       network = config.invokerContainerNetwork,
       dnsServers = config.invokerContainerDns,
       name = Some(name),
+      useRunc = config.invokerUseRunc,
       parameters)
   }
 
@@ -75,8 +76,11 @@ class DockerContainerFactory(config: WhiskConfig, instance: InstanceId, paramete
     val cleaning = docker.ps(Seq("name" -> s"wsk${instance.toInt}_"))(TransactionId.invokerNanny).flatMap {
       containers =>
         val removals = containers.map { id =>
-          runc
-            .resume(id)(TransactionId.invokerNanny)
+          (if (config.invokerUseRunc) {
+             runc.resume(id)(TransactionId.invokerNanny)
+           } else {
+             docker.unpause(id)(TransactionId.invokerNanny)
+           })
             .recoverWith {
               // Ignore resume failures and try to remove anyway
               case _ => Future.successful(())
diff --git a/core/invoker/src/main/scala/whisk/core/invoker/Invoker.scala b/core/invoker/src/main/scala/whisk/core/invoker/Invoker.scala
index f9646c1..a9bc21c 100644
--- a/core/invoker/src/main/scala/whisk/core/invoker/Invoker.scala
+++ b/core/invoker/src/main/scala/whisk/core/invoker/Invoker.scala
@@ -54,7 +54,8 @@ object Invoker {
       invokerCoreShare -> "2",
       invokerContainerPolicy -> "",
       invokerContainerDns -> "",
-      invokerContainerNetwork -> null)
+      invokerContainerNetwork -> null,
+      invokerUseRunc -> "true")
 
   def main(args: Array[String]): Unit = {
     require(args.length == 1, "invoker instance required")
diff --git a/docs/deploy.md b/docs/deploy.md
index 196dce9..99bf5ab 100644
--- a/docs/deploy.md
+++ b/docs/deploy.md
@@ -1,6 +1,8 @@
-# Requirements for controller clustering
+This page documents configuration options that should be considered when deploying OpenWhisk. Some deployment options require special treatment wrt to the underlying infrastructure/deployment model. Please carefully read about the constraints before you decide to enable them.
 
-This page describes requirements related to the introduction of akka clustering in Controller. There are cases where its usage requires special treatment wrt to the underlying infrastructure/deployment model. Please carefully read it before you decide to enable it.
+# Controller Clustering
+
+The system can be configured to use Akka clustering to manage the distributed state of the Contoller's load balancing algorithm.  This imposes the following constriaints on a deployment
 
 
 ## Controller nodes must have static IPs/Port combination.
@@ -14,3 +16,13 @@ How to down the members.
 
 Link to akka clustering documentation:
 https://doc.akka.io/docs/akka/2.5.4/scala/cluster-usage.html
+
+# Invoker use of docker-runc
+
+To improve performance, Invokers attempt to maintain warm containers for frequently executed actions. To optimize resource usage, the action containers are paused/unpaused between invocations.  The system can be configured to use either docker-runc or docker to perform the pause/unpause operations by setting the value of the environment variable INVOKER_USE_RUNC to true or false respectively. If not set, it will default to true (use docker-runc).
+
+Using docker-runc obtains significantly better performance, but requires that the version of docker-runc within the invoker container is an exact version match to the docker-runc of the host environment.  Failure to get an exact version match will results in error messages like:
+```
+2017-09-29T20:15:54.551Z] [ERROR] [#sid_102] [RuncClient] code: 1, stdout: , stderr: json: cannot unmarshal object into Go value of type []string [marker:invoker_runc.pause_error:6830148:259]
+```
+When a docker-runc operations results in an error, the container will be killed by the invoker.  This results in missed opportunities for container reuse and poor performance.  Setting INVOKER_USE_RUNC to false can be used as a workaround until proper usage of docker-runc can be configured for the deployment.
diff --git a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
index 960ab92..af16af3 100644
--- a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
+++ b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
@@ -75,7 +75,7 @@ class DockerContainerTests
       Future.successful(RunResult(intervalOf(1.millisecond), Right(ContainerResponse(true, "", None)))),
     retryCount: Int = 0)(implicit docker: DockerApiWithFileAccess, runc: RuncApi): DockerContainer = {
 
-    new DockerContainer(id, addr) {
+    new DockerContainer(id, addr, true) {
       override protected def callContainer(
         path: String,
         body: JsObject,
@@ -255,12 +255,12 @@ class DockerContainerTests
   /*
    * DOCKER COMMANDS
    */
-  it should "halt and resume container via runc" in {
+  it should "pause and resume container via runc" in {
     implicit val docker = stub[DockerApiWithFileAccess]
     implicit val runc = stub[RuncApi]
 
     val id = ContainerId("id")
-    val container = new DockerContainer(id, ContainerAddress("ip"))
+    val container = new DockerContainer(id, ContainerAddress("ip"), true)
 
     container.suspend()
     container.resume()
@@ -269,12 +269,26 @@ class DockerContainerTests
     (runc.resume(_: ContainerId)(_: TransactionId)).verify(id, transid)
   }
 
+  it should "pause and unpause container via docker" in {
+    implicit val docker = stub[DockerApiWithFileAccess]
+    implicit val runc = stub[RuncApi]
+
+    val id = ContainerId("id")
+    val container = new DockerContainer(id, ContainerAddress("ip"), false)
+
+    container.suspend()
+    container.resume()
+
+    (docker.pause(_: ContainerId)(_: TransactionId)).verify(id, transid)
+    (docker.unpause(_: ContainerId)(_: TransactionId)).verify(id, transid)
+  }
+
   it should "destroy a container via Docker" in {
     implicit val docker = stub[DockerApiWithFileAccess]
     implicit val runc = stub[RuncApi]
 
     val id = ContainerId("id")
-    val container = new DockerContainer(id, ContainerAddress("ip"))
+    val container = new DockerContainer(id, ContainerAddress("ip"), true)
 
     container.destroy()
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@openwhisk.apache.org" <co...@openwhisk.apache.org>'].