You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2017/10/10 21:28:37 UTC
[1/2] beam git commit: [BEAM-2600] Add minimal python SDK harness
container
Repository: beam
Updated Branches:
refs/heads/master b0030aeb3 -> bbc231c8e
[BEAM-2600] Add minimal python SDK harness container
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/79c55db9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/79c55db9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/79c55db9
Branch: refs/heads/master
Commit: 79c55db9dc1f3f961f5826f8e585758b26dc162b
Parents: b0030ae
Author: Henning Rohde <he...@google.com>
Authored: Mon Oct 9 15:28:25 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Oct 10 14:28:27 2017 -0700
----------------------------------------------------------------------
...ob_beam_PreCommit_Python_MavenInstall.groovy | 2 +-
sdks/go/cmd/beamctl/cmd/root.go | 1 +
sdks/python/container/Dockerfile | 27 ++++
sdks/python/container/boot.go | 122 +++++++++++++++
sdks/python/container/pom.xml | 154 +++++++++++++++++++
sdks/python/pom.xml | 7 +
6 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
index eae129b..feadb89 100644
--- a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
+++ b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
@@ -45,7 +45,7 @@ mavenJob('beam_PreCommit_Python_MavenInstall') {
--batch-mode \
--errors \
--activate-profiles release,jenkins-precommit,direct-runner,dataflow-runner,spark-runner,flink-runner,apex-runner \
- --projects sdks/python \
+ --projects sdks/python,!sdks/python/container \
--also-make \
--also-make-dependents \
-D pullRequest=$ghprbPullId \
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/go/cmd/beamctl/cmd/root.go
----------------------------------------------------------------------
diff --git a/sdks/go/cmd/beamctl/cmd/root.go b/sdks/go/cmd/beamctl/cmd/root.go
index 53ee83c..a4e7945 100644
--- a/sdks/go/cmd/beamctl/cmd/root.go
+++ b/sdks/go/cmd/beamctl/cmd/root.go
@@ -27,6 +27,7 @@ import (
)
var (
+ // RootCmd is the root for beamctl commands.
RootCmd = &cobra.Command{
Use: "beamctl",
Short: "Apache Beam command line client",
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/Dockerfile
----------------------------------------------------------------------
diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
new file mode 100644
index 0000000..826e36c
--- /dev/null
+++ b/sdks/python/container/Dockerfile
@@ -0,0 +1,27 @@
+###############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+FROM python:2
+MAINTAINER "Apache Beam <de...@beam.apache.org>"
+
+# TODO(herohde): preinstall various packages for better startup
+# performance and reliability.
+
+ADD target/linux_amd64/boot /opt/apache/beam/
+
+ENTRYPOINT ["/opt/apache/beam/boot"]
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/boot.go
----------------------------------------------------------------------
diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go
new file mode 100644
index 0000000..18b9900
--- /dev/null
+++ b/sdks/python/container/boot.go
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// boot is the boot code for the Python SDK harness container. It is responsible
+// for retrieving and install staged files and invoking python correctly.
+package main
+
+import (
+"context"
+"flag"
+"fmt"
+"log"
+"os"
+"path/filepath"
+"strings"
+
+"github.com/apache/beam/sdks/go/pkg/beam/artifact"
+"github.com/apache/beam/sdks/go/pkg/beam/provision"
+"github.com/apache/beam/sdks/go/pkg/beam/util/execx"
+"github.com/apache/beam/sdks/go/pkg/beam/util/grpcx"
+)
+
+var (
+ // Contract: https://s.apache.org/beam-fn-api-container-contract.
+
+ id = flag.String("id", "", "Local identifier (required).")
+ loggingEndpoint = flag.String("logging_endpoint", "", "Logging endpoint (required).")
+ artifactEndpoint = flag.String("artifact_endpoint", "", "Artifact endpoint (required).")
+ provisionEndpoint = flag.String("provision_endpoint", "", "Provision endpoint (required).")
+ controlEndpoint = flag.String("control_endpoint", "", "Control endpoint (required).")
+ semiPersistDir = flag.String("semi_persist_dir", "/tmp", "Local semi-persistent directory (optional).")
+)
+
+func main() {
+ flag.Parse()
+ if *id == "" {
+ log.Fatal("No id provided.")
+ }
+ if *loggingEndpoint == "" {
+ log.Fatal("No logging endpoint provided.")
+ }
+ if *artifactEndpoint == "" {
+ log.Fatal("No artifact endpoint provided.")
+ }
+ if *provisionEndpoint == "" {
+ log.Fatal("No provision endpoint provided.")
+ }
+ if *controlEndpoint == "" {
+ log.Fatal("No control endpoint provided.")
+ }
+
+ log.Printf("Initializing python harness: %v", strings.Join(os.Args, " "))
+
+ ctx := grpcx.WriteWorkerID(context.Background(), *id)
+
+ // (1) Obtain the pipeline options
+
+ info, err := provision.Info(ctx, *provisionEndpoint)
+ if err != nil {
+ log.Fatalf("Failed to obtain provisioning information: %v", err)
+ }
+ options, err := provision.ProtoToJSON(info.GetPipelineOptions())
+ if err != nil {
+ log.Fatalf("Failed to convert pipeline options: %v", err)
+ }
+
+ // (2) Retrieve and install the staged packages.
+
+ dir := filepath.Join(*semiPersistDir, "staged")
+
+ _, err = artifact.Materialize(ctx, *artifactEndpoint, dir)
+ if err != nil {
+ log.Fatalf("Failed to retrieve staged files: %v", err)
+ }
+
+ // TODO(herohde): the packages to install should be specified explicitly. It
+ // would also be possible to install the SDK in the Dockerfile.
+ if err := pipInstall(joinPaths(dir, "dataflow_python_sdk.tar[gcp]")); err != nil {
+ log.Fatalf("Failed to install SDK: %v", err)
+ }
+
+ // (3) Invoke python
+
+ os.Setenv("PIPELINE_OPTIONS", options)
+ os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *loggingEndpoint))
+ os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *controlEndpoint))
+
+ args := []string{
+ "-m",
+ "apache_beam.runners.worker.sdk_worker_main",
+ }
+ log.Printf("Executing: python %v", strings.Join(args, " "))
+
+ log.Fatalf("Python exited: %v", execx.Execute("python", args...))
+}
+
+// pipInstall runs pip install with the given args.
+func pipInstall(args []string) error {
+ return execx.Execute("pip", append([]string{"install"}, args...)...)
+}
+
+// joinPaths joins the dir to every artifact path. Each / in the path is
+// interpreted as a directory separator.
+func joinPaths(dir string, paths ...string) []string {
+ var ret []string
+ for _, p := range paths {
+ ret = append(ret, filepath.Join(dir, filepath.FromSlash(p)))
+ }
+ return ret
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/container/pom.xml b/sdks/python/container/pom.xml
new file mode 100644
index 0000000..45b8cbf
--- /dev/null
+++ b/sdks/python/container/pom.xml
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>beam-sdks-python</artifactId>
+ <version>2.3.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>beam-sdks-python-container</artifactId>
+
+ <packaging>pom</packaging>
+
+ <name>Apache Beam :: SDKs :: Python :: Container</name>
+
+ <properties>
+ <!-- Add full path directory structure for 'go get' compatibility -->
+ <go.source.base>${project.basedir}/target/src</go.source.base>
+ <go.source.dir>${go.source.base}/github.com/apache/beam/sdks/go</go.source.dir>
+ </properties>
+
+ <build>
+ <sourceDirectory>${go.source.base}</sourceDirectory>
+ <plugins>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-go-cmd-source</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${go.source.base}/github.com/apache/beam/cmd/boot</outputDirectory>
+ <resources>
+ <resource>
+ <directory>.</directory>
+ <includes>
+ <include>*.go</include>
+ </includes>
+ <filtering>false</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <!-- CAVEAT: for latest shared files, run mvn install in sdks/go -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-dependency</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>unpack</goal>
+ </goals>
+ <configuration>
+ <artifactItems>
+ <artifactItem>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>beam-sdks-go</artifactId>
+ <version>${project.version}</version>
+ <type>zip</type>
+ <classifier>pkg-sources</classifier>
+ <overWrite>true</overWrite>
+ <outputDirectory>${go.source.dir}</outputDirectory>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>com.igormaznitsa</groupId>
+ <artifactId>mvn-golang-wrapper</artifactId>
+ <executions>
+ <execution>
+ <id>go-get-imports</id>
+ <goals>
+ <goal>get</goal>
+ </goals>
+ <phase>compile</phase>
+ <configuration>
+ <packages>
+ <package>google.golang.org/grpc</package>
+ <package>golang.org/x/oauth2/google</package>
+ <package>google.golang.org/api/storage/v1</package>
+ </packages>
+ </configuration>
+ </execution>
+ <execution>
+ <id>go-build</id>
+ <goals>
+ <goal>build</goal>
+ </goals>
+ <phase>compile</phase>
+ <configuration>
+ <packages>
+ <package>github.com/apache/beam/cmd/boot</package>
+ </packages>
+ <resultName>boot</resultName>
+ </configuration>
+ </execution>
+ <execution>
+ <id>go-build-linux-amd64</id>
+ <goals>
+ <goal>build</goal>
+ </goals>
+ <phase>compile</phase>
+ <configuration>
+ <packages>
+ <package>github.com/apache/beam/cmd/boot</package>
+ </packages>
+ <resultName>linux_amd64/boot</resultName>
+ <targetArch>amd64</targetArch>
+ <targetOs>linux</targetOs>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>com.spotify</groupId>
+ <artifactId>dockerfile-maven-plugin</artifactId>
+ <configuration>
+ <repository>${docker-repository-root}/python</repository>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index 624048f..62135e8 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -32,6 +32,10 @@
<name>Apache Beam :: SDKs :: Python</name>
+ <modules>
+ <module>container</module>
+ </modules>
+
<properties>
<!-- python.interpreter.bin & python.pip.bin
is set dynamically by findSupportedPython.groovy -->
@@ -59,6 +63,7 @@
<groupId>org.codehaus.gmaven</groupId>
<artifactId>groovy-maven-plugin</artifactId>
<version>${groovy-maven-plugin.version}</version>
+ <inherited>false</inherited>
<executions>
<execution>
<id>find-supported-python-for-clean</id>
@@ -85,6 +90,7 @@
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
+ <inherited>false</inherited>
<executions>
<execution>
<id>setuptools-clean</id>
@@ -189,6 +195,7 @@
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
+ <inherited>false</inherited>
<executions>
<execution>
<id>setuptools-test</id>
[2/2] beam git commit: This closes #3965
Posted by al...@apache.org.
This closes #3965
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bbc231c8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bbc231c8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bbc231c8
Branch: refs/heads/master
Commit: bbc231c8eb8ee66cd5fdca74d6aec76dec5e4aa0
Parents: b0030ae 79c55db
Author: Ahmet Altay <al...@google.com>
Authored: Tue Oct 10 14:28:29 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Oct 10 14:28:29 2017 -0700
----------------------------------------------------------------------
...ob_beam_PreCommit_Python_MavenInstall.groovy | 2 +-
sdks/go/cmd/beamctl/cmd/root.go | 1 +
sdks/python/container/Dockerfile | 27 ++++
sdks/python/container/boot.go | 122 +++++++++++++++
sdks/python/container/pom.xml | 154 +++++++++++++++++++
sdks/python/pom.xml | 7 +
6 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------