You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2018/05/01 00:07:00 UTC

[jira] [Work logged] (BEAM-4131) Python SDK harness container image contains SDK and dependencies

     [ https://issues.apache.org/jira/browse/BEAM-4131?focusedWorklogId=96894&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-96894 ]

ASF GitHub Bot logged work on BEAM-4131:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 01/May/18 00:06
            Start Date: 01/May/18 00:06
    Worklog Time Spent: 10m 
      Work Description: lukecwik closed pull request #5233: [BEAM-4131] Include SDK into Python SDK harness container.
URL: https://github.com/apache/beam/pull/5233
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle
index 5984b35f791..5ee32e46419 100644
--- a/sdks/python/build.gradle
+++ b/sdks/python/build.gradle
@@ -44,15 +44,25 @@ task setupVirtualenv {
   outputs.dirs(envdir)
 }
 
+configurations { distConfig }
+
 task sdist(dependsOn: 'setupVirtualenv') {
   doLast {
     exec {
       executable 'sh'
       args '-c', ". ${envdir}/bin/activate && python setup.py sdist --formats zip,gztar --dist-dir ${project.buildDir}"
     }
+    def collection = fileTree("${project.buildDir}"){ include '**/*.tar.gz' exclude '**/apache-beam.tar.gz'}
+    println "sdist archive name: ${collection.singleFile}"
+    // we need a fixed name for the artifact
+    copy { from collection.singleFile; into "${project.buildDir}"; rename { 'apache-beam.tar.gz' } }
   }
 }
 
+artifacts {
+  distConfig file: file("${project.buildDir}/apache-beam.tar.gz"), builtBy: sdist
+}
+
 task cleanPython(dependsOn: 'setupVirtualenv') {
   doLast {
     exec {
diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
index 82d76c9ff5c..0e95dbe8247 100644
--- a/sdks/python/container/Dockerfile
+++ b/sdks/python/container/Dockerfile
@@ -80,6 +80,9 @@ RUN \
     # Check that the fast implementation of protobuf is used.
     python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'cpp'; print 'Verified fast protobuf used.'"
 
+COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
+RUN pip install /opt/apache/beam/tars/apache-beam.tar.gz[gcp]
+
 ADD target/linux_amd64/boot /opt/apache/beam/
 
 ENTRYPOINT ["/opt/apache/beam/boot"]
diff --git a/sdks/python/container/build.gradle b/sdks/python/container/build.gradle
index 551f10ec77d..0e2f12aacdd 100644
--- a/sdks/python/container/build.gradle
+++ b/sdks/python/container/build.gradle
@@ -27,6 +27,10 @@ description = "Apache Beam :: SDKs :: Python :: Container"
 // "./github.com/apache/beam/sdks/go"
 resolveBuildDependencies.dependsOn ":beam-sdks-go:build"
 
+configurations {
+  dockerDependency
+}
+
 dependencies {
   golang {
     // TODO(herohde): use "./" prefix to prevent gogradle use base github path, for now.
@@ -34,6 +38,14 @@ dependencies {
     build name: './github.com/apache/beam/sdks/go', dir: project(':beam-sdks-go').projectDir
     test name: './github.com/apache/beam/sdks/go', dir: project(':beam-sdks-go').projectDir
   }
+
+  dockerDependency project(path: ":beam-sdks-python", configuration: "distConfig")
+}
+
+task copyDockerfileDependencies(type: Copy) {
+  from configurations.dockerDependency
+  into "build/target"
+  configurations.dockerDependency.stopExecutionIfEmpty()
 }
 
 golang {
@@ -49,5 +61,7 @@ docker {
   name containerImageName(name: "python")
   files "./build/"
 }
+
 // Ensure that making the docker image builds any required artifacts
 dockerPrepare.dependsOn build
+dockerPrepare.dependsOn copyDockerfileDependencies
diff --git a/sdks/python/container/pom.xml b/sdks/python/container/pom.xml
index 2b706826553..05a27b288fb 100644
--- a/sdks/python/container/pom.xml
+++ b/sdks/python/container/pom.xml
@@ -145,6 +145,27 @@
         </executions>
       </plugin>
 
+      <plugin>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-sdist-archive</id>
+            <phase>package</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <target>
+                <copy todir="target">
+                  <fileset dir="../target" includes="**/apache-beam*.tar.gz"/>
+                  <globmapper from="*" to="apache-beam.tar.gz"/>
+                </copy>
+              </target>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
       <plugin>
         <groupId>com.spotify</groupId>
         <artifactId>dockerfile-maven-plugin</artifactId>


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 96894)
    Time Spent: 2h 40m  (was: 2.5h)

> Python SDK harness container image contains SDK and dependencies
> ----------------------------------------------------------------
>
>                 Key: BEAM-4131
>                 URL: https://issues.apache.org/jira/browse/BEAM-4131
>             Project: Beam
>          Issue Type: New Feature
>          Components: sdk-py-harness
>            Reporter: Ben Sidhom
>            Assignee: Thomas Weise
>            Priority: Minor
>          Time Spent: 2h 40m
>  Remaining Estimate: 0h
>
> The Docker image for the SDK harness should contain SDK code and dependencies so that this does not need to be downloaded from the artifact retrieval service at each boot.
> This is required for operation with portable runners right now because the python client does not currently stage the SDK itself (as it does with the Dataflow runner).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)