You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2018/08/08 17:42:23 UTC

[beam] branch master updated: [BEAM-4833] Add support for user req.txt for portable python (#6005)

This is an automated email from the ASF dual-hosted git repository.

altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new ae6d168  [BEAM-4833] Add support for user req.txt for portable python (#6005)
ae6d168 is described below

commit ae6d16893cf17835e9ef0d7b99d146f8a5153405
Author: Holden Karau <ho...@pigscanfly.ca>
AuthorDate: Wed Aug 8 10:42:19 2018 -0700

    [BEAM-4833] Add support for user req.txt for portable python (#6005)
    
    * Add initial support for user passing in requirements.txt in container build
---
 sdks/CONTAINERS.md                                 | 18 +++++++++
 sdks/python/container/Dockerfile                   | 12 ++++--
 .../python/container/extra_requirements/Dockerfile | 27 +++++++++++++
 sdks/python/scripts/add_requirements.sh            | 45 ++++++++++++++++++++++
 4 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/sdks/CONTAINERS.md b/sdks/CONTAINERS.md
index c06b4a5..e5461c1 100644
--- a/sdks/CONTAINERS.md
+++ b/sdks/CONTAINERS.md
@@ -118,6 +118,24 @@ version, you can do so by adding:
 -Pdocker-tag=<tag>
 ```
 
+### Adding dependencies, and making Python go vroom vroom
+
+Not all dependencies are like insurance on used Vespa, if you don't have them some job's just won't run at all and you can't sweet talk your way out of a tensorflow dependency. On the other hand, for Python users dependencies can be automatically installed at run time on each container, which is a great way to find out what your systems timeout limits are. Regardless as to if you have dependency which isn't being installed for you and you need, or you just don't want to install tensorflo [...]
+
+For Python we have a sample Dockerfile which will take the user specified requirements and install them on top of your base image. If your building from source follow the directions above, otherwise you can set the environment variable BASE_PYTHON_CONTAINER_IMAGE to the desired released version.
+
+```
+USER_REQUIREMENTS=~/my_req.txt ./sdks/python/scripts/add_requirements.sh
+```
+
+Once your custom container is built, remember to upload it to the registry of your choice.
+
+If you build a custom container when you run your job you will need to specify instead of the default latest container, so for example Holden would specify:
+
+```
+--worker_harness_container_image=holden-docker-apache.bintray.io/beam/python-with-requirements
+```
+
 ## How to push container images
 
 **Preprequisites**: obtain a docker registry account and ensure docker can push images to it,
diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
index afb6b43..a8fa7f3 100644
--- a/sdks/python/container/Dockerfile
+++ b/sdks/python/container/Dockerfile
@@ -42,6 +42,7 @@ RUN apt-get update && \
 #
 RUN \
     # These are packages needed by the Python SDK.
+    # TODO: This make more sense as a requirements.txt file (BEAM-5076)
     pip install "avro == 1.8.2" && \
     pip install "crcmod == 1.7" && \
     pip install "dill == 0.2.6" && \
@@ -79,13 +80,16 @@ RUN \
     pip install "tensorflow == 1.4.0" && \
     pip install "protorpc == 0.11.1" && \
     pip install "python-gflags == 3.0.6" && \
-    # Remove pip cache.
-    rm -rf /root/.cache/pip && \
     # Check that the fast implementation of protobuf is used.
-    python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'cpp'; print 'Verified fast protobuf used.'"
+    python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'cpp'; print 'Verified fast protobuf used.'" && \
+    # Remove pip cache.
+    rm -rf /root/.cache/pip
+
 
 COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
-RUN pip install /opt/apache/beam/tars/apache-beam.tar.gz[gcp]
+RUN pip install /opt/apache/beam/tars/apache-beam.tar.gz[gcp] && \
+    # Remove pip cache.
+    rm -rf /root/.cache/pip
 
 ADD target/linux_amd64/boot /opt/apache/beam/
 
diff --git a/sdks/python/container/extra_requirements/Dockerfile b/sdks/python/container/extra_requirements/Dockerfile
new file mode 100644
index 0000000..bbc39a6
--- /dev/null
+++ b/sdks/python/container/extra_requirements/Dockerfile
@@ -0,0 +1,27 @@
+###############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+ARG BASE_PYTHON_IMAGE
+FROM $BASE_PYTHON_IMAGE
+MAINTAINER "Apache Beam <de...@beam.apache.org>"
+
+COPY requirements.txt /tmp/user_requirements.txt
+RUN pip install -r /tmp/user_requirements.txt && \
+    # Remove pip cache.
+    rm -rf /root/.cache/pip
+
diff --git a/sdks/python/scripts/add_requirements.sh b/sdks/python/scripts/add_requirements.sh
new file mode 100755
index 0000000..af2a878
--- /dev/null
+++ b/sdks/python/scripts/add_requirements.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+# This script builds a Docker container with the user specified requirements on top of
+# an existing Python worker Docker container (either one you build from source as
+# described in CONTAINERS.md or from a released Docker container).
+
+# Quit on any errors
+set -e
+
+echo "To add requirements you will need a requirements.txt (you can specify with
+ the env variable USER_REQUIREMENTS) and somewhere to push the resulting docker
+ image (e.g bintrary, GCP container registry)."
+
+# Be really verbose about each command we are running
+set -x
+
+
+USER_REQUIREMENTS=${USER_REQUIREMENTS:-requirements.txt}
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+BASE_PYTHON_IMAGE=${BASE_PYTHON_IMAGE:-"$(whoami)-docker-apache.bintray.io/beam/python"}
+NEW_PYTHON_IMAGE=${NEW_PYTHON_IMAGE:-"${BASE_PYTHON_IMAGE}-with-requirements"}
+DOCKER_FILE="${SCRIPT_DIR}/../container/extra_requirements/Dockerfile"
+TEMP_DIR=$(mktemp -d -t "boo-loves-beam-XXXXXXXXXXXXXXX")
+cp $DOCKER_FILE $TEMP_DIR
+cp $USER_REQUIREMENTS $TEMP_DIR/requirements.txt
+pushd $TEMP_DIR
+docker build . -t $NEW_PYTHON_IMAGE --build-arg BASE_PYTHON_IMAGE=$BASE_PYTHON_IMAGE
+popd
+rm -rf $TEMP_DIR