You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwhisk.apache.org by cb...@apache.org on 2018/07/24 09:29:46 UTC

[incubator-openwhisk] branch master updated: Recover image pulls by trying to run the container anyways. (#3813)

This is an automated email from the ASF dual-hosted git repository.

cbickel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b5abe7  Recover image pulls by trying to run the container anyways. (#3813)
8b5abe7 is described below

commit 8b5abe7bbeb4464af586c1993fc3590e0fe8d516
Author: Markus Thömmes <ma...@me.com>
AuthorDate: Tue Jul 24 11:29:43 2018 +0200

    Recover image pulls by trying to run the container anyways. (#3813)
    
    A `docker pull` can fail due to various reasons. One of them is network throttling by the image registry. Since we try to pull on each blackbox invocation, high volume load can cause lots of errors due to failing pulls unnecessarily.
    
    Instead, we try to run the container even if the pull failed in the first place. If the image is available locally, the container will start just fine and recover the error gracefully. If the image is not available locally, the run will fail as well and return the same error as the docker pull would've returned.
    
    This behavior will only be enabled for blackbox actions that specify a tag. Blackbox actions not using a tag *or* using "latest" as a tag will exhibit the very same behavior as today. That is: There will always be a pull before each container start and a failing pull will result in an error reported to the user. This is to enable rapid prototyping on images and enable determinism in the workflow. Updating the action will then force a pull and will fail early if that pull fails. With t [...]
    
    For production workload it is considered best-practice to version images through labels, thus we can "safely" assume that we can fall back to a local image in case the pull fails.
---
 .../containerpool/docker/DockerContainer.scala     | 48 +++++++----
 .../docker/DockerContainerFactory.scala            |  9 +-
 docs/actions-docker.md                             | 10 +++
 .../docker/test/DockerContainerTests.scala         | 98 +++++++++++++++-------
 4 files changed, 112 insertions(+), 53 deletions(-)

diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
index 5c959de..499ae42 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
@@ -39,6 +39,7 @@ import akka.stream.stage._
 import akka.util.ByteString
 import spray.json._
 import whisk.core.containerpool.logging.LogLine
+import whisk.core.entity.ExecManifest.ImageName
 import whisk.http.Messages
 
 object DockerContainer {
@@ -54,9 +55,7 @@ object DockerContainer {
    * Creates a container running on a docker daemon.
    *
    * @param transid transaction creating the container
-   * @param image image to create the container from
-   * @param userProvidedImage whether the image is provided by the user
-   *     or is an OpenWhisk provided image
+   * @param image either a user provided (Left) or OpenWhisk provided (Right) image
    * @param memory memorylimit of the container
    * @param cpuShares sharefactor for the container
    * @param environment environment variables to set on the container
@@ -67,8 +66,7 @@ object DockerContainer {
    * @return a Future which either completes with a DockerContainer or one of two specific failures
    */
   def create(transid: TransactionId,
-             image: String,
-             userProvidedImage: Boolean = false,
+             image: Either[ImageName, String],
              memory: ByteSize = 256.MB,
              cpuShares: Int = 0,
              environment: Map[String, String] = Map.empty,
@@ -104,22 +102,44 @@ object DockerContainer {
       dnsServers.flatMap(d => Seq("--dns", d)) ++
       name.map(n => Seq("--name", n)).getOrElse(Seq.empty) ++
       params
-    val pulled = if (userProvidedImage) {
-      docker.pull(image).recoverWith {
-        case _ => Future.failed(BlackboxStartupError(Messages.imagePullError(image)))
-      }
-    } else Future.successful(())
+
+    val imageToUse = image.fold(_.publicImageName, identity)
+
+    val pulled = image match {
+      case Left(userProvided) if userProvided.tag.map(_ == "latest").getOrElse(true) =>
+        // Iff the image tag is "latest" explicitly (or implicitly because no tag is given at all), failing to pull will
+        // fail the whole container bringup process, because it is expected to pick up the very latest "untagged"
+        // version every time.
+        docker.pull(imageToUse).map(_ => true).recoverWith {
+          case _ => Future.failed(BlackboxStartupError(Messages.imagePullError(imageToUse)))
+        }
+      case Left(_) =>
+        // Iff the image tag is something else than latest, we tolerate an outdated image if one is available locally.
+        // A `docker run` will be tried nonetheless to try to start a container (which will succeed if the image is
+        // already available locally)
+        docker.pull(imageToUse).map(_ => true).recover { case _ => false }
+      case Right(_) =>
+        // Iff we're not pulling at all (OpenWhisk provided image) we act as if the pull was successful.
+        Future.successful(true)
+    }
 
     for {
-      _ <- pulled
-      id <- docker.run(image, args).recoverWith {
-        case BrokenDockerContainer(brokenId, message) =>
+      pullSuccessful <- pulled
+      id <- docker.run(imageToUse, args).recoverWith {
+        case BrokenDockerContainer(brokenId, _) =>
           // Remove the broken container - but don't wait or check for the result.
           // If the removal fails, there is nothing we could do to recover from the recovery.
           docker.rm(brokenId)
           Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
         case _ =>
-          Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
+          // Iff the pull was successful, we assume that the error is not due to an image pull error, otherwise
+          // the docker run was a backup measure to try and start the container anyway. If it fails again, we assume
+          // the image could still not be pulled and wasn't available locally.
+          if (pullSuccessful) {
+            Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
+          } else {
+            Future.failed(BlackboxStartupError(Messages.imagePullError(imageToUse)))
+          }
       }
       ip <- docker.inspectIPAddress(id, network).recoverWith {
         // remove the container immediately if inspect failed as
diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
index 68ada27..8c1fea8 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainerFactory.scala
@@ -58,16 +58,9 @@ class DockerContainerFactory(instance: InvokerInstanceId,
                                userProvidedImage: Boolean,
                                memory: ByteSize,
                                cpuShares: Int)(implicit config: WhiskConfig, logging: Logging): Future[Container] = {
-    val image = if (userProvidedImage) {
-      actionImage.publicImageName
-    } else {
-      actionImage.localImageName(config.runtimesRegistry)
-    }
-
     DockerContainer.create(
       tid,
-      image = image,
-      userProvidedImage = userProvidedImage,
+      image = if (userProvidedImage) Left(actionImage) else Right(actionImage.localImageName(config.runtimesRegistry)),
       memory = memory,
       cpuShares = cpuShares,
       environment = Map("__OW_API_HOST" -> config.wskApiHost),
diff --git a/docs/actions-docker.md b/docs/actions-docker.md
index 8b4e891..79ffdc5 100644
--- a/docs/actions-docker.md
+++ b/docs/actions-docker.md
@@ -105,6 +105,12 @@ The instructions that follow show you how to use the OpenWhisk Docker skeleton.
   Notice the use of `--docker` when creating an action. Currently all Docker images are assumed
   to be hosted on Docker Hub.
 
+  *Note:* It is considered best-practice for production images to be versioned via docker image tags. The absence of a tag will be treated the same as using the tag "latest", which will guarantee to pull from the registry when creating new containers. That contains the possibility of failing because pulling the image fails. For tagged images however, the system will allow a failing pull and gracefully recover it by using the image that is already locally available, making it much more re [...]
+
+  The "latest" tag should therefore only be used for rapid prototyping where guaranteeing the latest code state is more important than runtime stability at scale.
+
+  Please also note that "latest" doesn't mean newest tag, but rather "latest" is an alias for any image built without an explicit tag.
+
 ## Invoking a Docker action
 
 Docker actions are invoked as [any other OpenWhisk action](actions.md#the-basics).
@@ -137,6 +143,10 @@ This will indicate to the system that for new invocations it should execute a do
   wsk action update example --docker janesmith/blackboxdemo
   ```
 
+**Note:** As noted above, only images with the tag "latest" or no tag will guarantee to be pulled again, even after updating the action. Any other tag might fall back to use the old image for stability reasons.
+
+To force an updated image after an action update, consider using versioned tags on the image.
+
 ## Creating native actions
 
 Docker actions accept initialization data via a (zip) file, similar to other actions kinds.
diff --git a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
index 490bf12..a7080af 100644
--- a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
+++ b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
@@ -48,8 +48,8 @@ import whisk.core.entity.ActivationResponse.ContainerResponse
 import whisk.core.entity.ActivationResponse.Timeout
 import whisk.core.entity.size._
 import whisk.http.Messages
-
 import DockerContainerTests._
+import whisk.core.entity.ExecManifest.ImageName
 
 object DockerContainerTests {
 
@@ -142,7 +142,7 @@ class DockerContainerTests
     val name = "myContainer"
     val container = DockerContainer.create(
       transid = transid,
-      image = image,
+      image = Right(image),
       memory = memory,
       cpuShares = cpuShares,
       environment = environment,
@@ -180,11 +180,8 @@ class DockerContainerTests
     implicit val docker = new TestDockerClient
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(
-      transid = transid,
-      image = "image",
-      userProvidedImage = true,
-      dockerRunParameters = parameters)
+    val container =
+      DockerContainer.create(transid = transid, image = Left(ImageName("image")), dockerRunParameters = parameters)
     await(container)
 
     docker.pulls should have size 1
@@ -203,7 +200,7 @@ class DockerContainerTests
     }
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(transid = transid, image = "image", dockerRunParameters = parameters)
+    val container = DockerContainer.create(transid = transid, image = Right("image"), dockerRunParameters = parameters)
     a[WhiskContainerStartupError] should be thrownBy await(container)
 
     docker.pulls should have size 0
@@ -222,11 +219,8 @@ class DockerContainerTests
     }
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(
-      transid = transid,
-      image = "image",
-      userProvidedImage = true,
-      dockerRunParameters = parameters)
+    val container =
+      DockerContainer.create(transid = transid, image = Left(ImageName("image")), dockerRunParameters = parameters)
     a[WhiskContainerStartupError] should be thrownBy await(container)
 
     docker.pulls should have size 1
@@ -245,11 +239,8 @@ class DockerContainerTests
     }
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(
-      transid = transid,
-      image = "image",
-      userProvidedImage = false,
-      dockerRunParameters = parameters)
+    val container =
+      DockerContainer.create(transid = transid, image = Right("image"), dockerRunParameters = parameters)
     a[WhiskContainerStartupError] should be thrownBy await(container)
 
     docker.pulls should have size 0
@@ -268,11 +259,8 @@ class DockerContainerTests
     }
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(
-      transid = transid,
-      image = "image",
-      userProvidedImage = true,
-      dockerRunParameters = parameters)
+    val container =
+      DockerContainer.create(transid = transid, image = Left(ImageName("image")), dockerRunParameters = parameters)
     a[WhiskContainerStartupError] should be thrownBy await(container)
 
     docker.pulls should have size 1
@@ -281,7 +269,7 @@ class DockerContainerTests
     docker.rms should have size 1
   }
 
-  it should "return a specific error if pulling a user provided image failed" in {
+  it should "return a specific error if pulling a user provided image failed (given the image does not define a tag)" in {
     implicit val docker = new TestDockerClient {
       override def pull(image: String)(implicit transid: TransactionId): Future[Unit] = {
         pulls += image
@@ -290,19 +278,67 @@ class DockerContainerTests
     }
     implicit val runc = stub[RuncApi]
 
-    val container = DockerContainer.create(
-      transid = transid,
-      image = "image",
-      userProvidedImage = true,
-      dockerRunParameters = parameters)
-    a[BlackboxStartupError] should be thrownBy await(container)
+    val imageName = "image"
+    val container =
+      DockerContainer.create(transid = transid, image = Left(ImageName(imageName)), dockerRunParameters = parameters)
+    val exception = the[BlackboxStartupError] thrownBy await(container)
+    exception.msg shouldBe Messages.imagePullError(imageName)
 
     docker.pulls should have size 1
-    docker.runs should have size 0
+    docker.runs should have size 0 // run is **not** called as a backup measure because no tag is defined
     docker.inspects should have size 0
     docker.rms should have size 0
   }
 
+  it should "recover a failed image pull if the subsequent docker run succeeds" in {
+    implicit val docker = new TestDockerClient {
+      override def pull(image: String)(implicit transid: TransactionId): Future[Unit] = {
+        pulls += image
+        Future.failed(new RuntimeException())
+      }
+    }
+    implicit val runc = stub[RuncApi]
+
+    val container =
+      DockerContainer.create(
+        transid = transid,
+        image = Left(ImageName("image", tag = Some("prod"))),
+        dockerRunParameters = parameters)
+
+    noException should be thrownBy await(container)
+
+    docker.pulls should have size 1
+    docker.runs should have size 1 // run is called as a backup measure in case the image is locally available
+    docker.inspects should have size 1
+    docker.rms should have size 0
+  }
+
+  it should "throw a pull exception if a recovering docker run fails as well" in {
+    implicit val docker = new TestDockerClient {
+      override def pull(image: String)(implicit transid: TransactionId): Future[Unit] = {
+        pulls += image
+        Future.failed(new RuntimeException())
+      }
+      override def run(image: String, args: Seq[String])(implicit transid: TransactionId): Future[ContainerId] = {
+        runs += ((image, args))
+        Future.failed(new RuntimeException())
+      }
+    }
+    implicit val runc = stub[RuncApi]
+
+    val imageName = ImageName("image", tag = Some("prod"))
+    val container =
+      DockerContainer.create(transid = transid, image = Left(imageName), dockerRunParameters = parameters)
+
+    val exception = the[BlackboxStartupError] thrownBy await(container)
+    exception.msg shouldBe Messages.imagePullError(imageName.publicImageName)
+
+    docker.pulls should have size 1
+    docker.runs should have size 1 // run is called as a backup measure in case the image is locally available
+    docker.inspects should have size 0 // inspect is never called because the run failed as well
+    docker.rms should have size 0
+  }
+
   /*
    * DOCKER COMMANDS
    */