You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2017/10/10 21:28:37 UTC

[1/2] beam git commit: [BEAM-2600] Add minimal python SDK harness container

Repository: beam
Updated Branches:
  refs/heads/master b0030aeb3 -> bbc231c8e


[BEAM-2600] Add minimal python SDK harness container


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/79c55db9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/79c55db9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/79c55db9

Branch: refs/heads/master
Commit: 79c55db9dc1f3f961f5826f8e585758b26dc162b
Parents: b0030ae
Author: Henning Rohde <he...@google.com>
Authored: Mon Oct 9 15:28:25 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Oct 10 14:28:27 2017 -0700

----------------------------------------------------------------------
 ...ob_beam_PreCommit_Python_MavenInstall.groovy |   2 +-
 sdks/go/cmd/beamctl/cmd/root.go                 |   1 +
 sdks/python/container/Dockerfile                |  27 ++++
 sdks/python/container/boot.go                   | 122 +++++++++++++++
 sdks/python/container/pom.xml                   | 154 +++++++++++++++++++
 sdks/python/pom.xml                             |   7 +
 6 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
index eae129b..feadb89 100644
--- a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
+++ b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy
@@ -45,7 +45,7 @@ mavenJob('beam_PreCommit_Python_MavenInstall') {
     --batch-mode \
     --errors \
     --activate-profiles release,jenkins-precommit,direct-runner,dataflow-runner,spark-runner,flink-runner,apex-runner \
-    --projects sdks/python \
+    --projects sdks/python,!sdks/python/container \
     --also-make \
     --also-make-dependents \
     -D pullRequest=$ghprbPullId \

http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/go/cmd/beamctl/cmd/root.go
----------------------------------------------------------------------
diff --git a/sdks/go/cmd/beamctl/cmd/root.go b/sdks/go/cmd/beamctl/cmd/root.go
index 53ee83c..a4e7945 100644
--- a/sdks/go/cmd/beamctl/cmd/root.go
+++ b/sdks/go/cmd/beamctl/cmd/root.go
@@ -27,6 +27,7 @@ import (
 )
 
 var (
+	// RootCmd is the root for beamctl commands.
 	RootCmd = &cobra.Command{
 		Use:   "beamctl",
 		Short: "Apache Beam command line client",

http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/Dockerfile
----------------------------------------------------------------------
diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
new file mode 100644
index 0000000..826e36c
--- /dev/null
+++ b/sdks/python/container/Dockerfile
@@ -0,0 +1,27 @@
+###############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+FROM python:2
+MAINTAINER "Apache Beam <de...@beam.apache.org>"
+
+# TODO(herohde): preinstall various packages for better startup
+# performance and reliability.
+
+ADD target/linux_amd64/boot /opt/apache/beam/
+
+ENTRYPOINT ["/opt/apache/beam/boot"]

http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/boot.go
----------------------------------------------------------------------
diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go
new file mode 100644
index 0000000..18b9900
--- /dev/null
+++ b/sdks/python/container/boot.go
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// boot is the boot code for the Python SDK harness container. It is responsible
+// for retrieving and install staged files and invoking python correctly.
+package main
+
+import (
+"context"
+"flag"
+"fmt"
+"log"
+"os"
+"path/filepath"
+"strings"
+
+"github.com/apache/beam/sdks/go/pkg/beam/artifact"
+"github.com/apache/beam/sdks/go/pkg/beam/provision"
+"github.com/apache/beam/sdks/go/pkg/beam/util/execx"
+"github.com/apache/beam/sdks/go/pkg/beam/util/grpcx"
+)
+
+var (
+	// Contract: https://s.apache.org/beam-fn-api-container-contract.
+
+	id                = flag.String("id", "", "Local identifier (required).")
+	loggingEndpoint   = flag.String("logging_endpoint", "", "Logging endpoint (required).")
+	artifactEndpoint  = flag.String("artifact_endpoint", "", "Artifact endpoint (required).")
+	provisionEndpoint = flag.String("provision_endpoint", "", "Provision endpoint (required).")
+	controlEndpoint   = flag.String("control_endpoint", "", "Control endpoint (required).")
+	semiPersistDir    = flag.String("semi_persist_dir", "/tmp", "Local semi-persistent directory (optional).")
+)
+
+func main() {
+	flag.Parse()
+	if *id == "" {
+		log.Fatal("No id provided.")
+	}
+	if *loggingEndpoint == "" {
+		log.Fatal("No logging endpoint provided.")
+	}
+	if *artifactEndpoint == "" {
+		log.Fatal("No artifact endpoint provided.")
+	}
+	if *provisionEndpoint == "" {
+		log.Fatal("No provision endpoint provided.")
+	}
+	if *controlEndpoint == "" {
+		log.Fatal("No control endpoint provided.")
+	}
+
+	log.Printf("Initializing python harness: %v", strings.Join(os.Args, " "))
+
+	ctx := grpcx.WriteWorkerID(context.Background(), *id)
+
+	// (1) Obtain the pipeline options
+
+	info, err := provision.Info(ctx, *provisionEndpoint)
+	if err != nil {
+		log.Fatalf("Failed to obtain provisioning information: %v", err)
+	}
+	options, err := provision.ProtoToJSON(info.GetPipelineOptions())
+	if err != nil {
+		log.Fatalf("Failed to convert pipeline options: %v", err)
+	}
+
+	// (2) Retrieve and install the staged packages.
+
+	dir := filepath.Join(*semiPersistDir, "staged")
+
+	_, err = artifact.Materialize(ctx, *artifactEndpoint, dir)
+	if err != nil {
+		log.Fatalf("Failed to retrieve staged files: %v", err)
+	}
+
+	// TODO(herohde): the packages to install should be specified explicitly. It
+	// would also be possible to install the SDK in the Dockerfile.
+	if err := pipInstall(joinPaths(dir, "dataflow_python_sdk.tar[gcp]")); err != nil {
+		log.Fatalf("Failed to install SDK: %v", err)
+	}
+
+	// (3) Invoke python
+
+	os.Setenv("PIPELINE_OPTIONS", options)
+	os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *loggingEndpoint))
+	os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *controlEndpoint))
+
+	args := []string{
+		"-m",
+		"apache_beam.runners.worker.sdk_worker_main",
+	}
+	log.Printf("Executing: python %v", strings.Join(args, " "))
+
+	log.Fatalf("Python exited: %v", execx.Execute("python", args...))
+}
+
+// pipInstall runs pip install with the given args.
+func pipInstall(args []string) error {
+	return execx.Execute("pip", append([]string{"install"}, args...)...)
+}
+
+// joinPaths joins the dir to every artifact path. Each / in the path is
+// interpreted as a directory separator.
+func joinPaths(dir string, paths ...string) []string {
+	var ret []string
+	for _, p := range paths {
+		ret = append(ret, filepath.Join(dir, filepath.FromSlash(p)))
+	}
+	return ret
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/container/pom.xml b/sdks/python/container/pom.xml
new file mode 100644
index 0000000..45b8cbf
--- /dev/null
+++ b/sdks/python/container/pom.xml
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>beam-sdks-python</artifactId>
+    <version>2.3.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>beam-sdks-python-container</artifactId>
+
+  <packaging>pom</packaging>
+
+  <name>Apache Beam :: SDKs :: Python :: Container</name>
+
+  <properties>
+    <!-- Add full path directory structure for 'go get' compatibility -->
+    <go.source.base>${project.basedir}/target/src</go.source.base>
+    <go.source.dir>${go.source.base}/github.com/apache/beam/sdks/go</go.source.dir>
+  </properties>
+
+  <build>
+    <sourceDirectory>${go.source.base}</sourceDirectory>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-go-cmd-source</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${go.source.base}/github.com/apache/beam/cmd/boot</outputDirectory>
+              <resources>
+                <resource>
+                  <directory>.</directory>
+                  <includes>
+                    <include>*.go</include>
+                  </includes>
+                  <filtering>false</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- CAVEAT: for latest shared files, run mvn install in sdks/go -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-dependency</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>unpack</goal>
+            </goals>
+            <configuration>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>org.apache.beam</groupId>
+                  <artifactId>beam-sdks-go</artifactId>
+                  <version>${project.version}</version>
+                  <type>zip</type>
+                  <classifier>pkg-sources</classifier>
+                  <overWrite>true</overWrite>
+                  <outputDirectory>${go.source.dir}</outputDirectory>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>com.igormaznitsa</groupId>
+        <artifactId>mvn-golang-wrapper</artifactId>
+        <executions>
+          <execution>
+            <id>go-get-imports</id>
+            <goals>
+              <goal>get</goal>
+            </goals>
+            <phase>compile</phase>
+            <configuration>
+              <packages>
+                <package>google.golang.org/grpc</package>
+                <package>golang.org/x/oauth2/google</package>
+                <package>google.golang.org/api/storage/v1</package>
+              </packages>
+            </configuration>
+          </execution>
+          <execution>
+            <id>go-build</id>
+            <goals>
+              <goal>build</goal>
+            </goals>
+            <phase>compile</phase>
+            <configuration>
+              <packages>
+                <package>github.com/apache/beam/cmd/boot</package>
+              </packages>
+              <resultName>boot</resultName>
+            </configuration>
+          </execution>
+          <execution>
+            <id>go-build-linux-amd64</id>
+            <goals>
+              <goal>build</goal>
+            </goals>
+            <phase>compile</phase>
+            <configuration>
+              <packages>
+                <package>github.com/apache/beam/cmd/boot</package>
+              </packages>
+              <resultName>linux_amd64/boot</resultName>
+              <targetArch>amd64</targetArch>
+              <targetOs>linux</targetOs>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>com.spotify</groupId>
+        <artifactId>dockerfile-maven-plugin</artifactId>
+        <configuration>
+          <repository>${docker-repository-root}/python</repository>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index 624048f..62135e8 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -32,6 +32,10 @@
 
   <name>Apache Beam :: SDKs :: Python</name>
 
+  <modules>
+    <module>container</module>
+  </modules>
+
   <properties>
     <!-- python.interpreter.bin & python.pip.bin
          is set dynamically by findSupportedPython.groovy -->
@@ -59,6 +63,7 @@
         <groupId>org.codehaus.gmaven</groupId>
         <artifactId>groovy-maven-plugin</artifactId>
         <version>${groovy-maven-plugin.version}</version>
+        <inherited>false</inherited>
         <executions>
           <execution>
             <id>find-supported-python-for-clean</id>
@@ -85,6 +90,7 @@
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>exec-maven-plugin</artifactId>
+        <inherited>false</inherited>
         <executions>
           <execution>
             <id>setuptools-clean</id>
@@ -189,6 +195,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
+            <inherited>false</inherited>
             <executions>
               <execution>
                 <id>setuptools-test</id>


[2/2] beam git commit: This closes #3965

Posted by al...@apache.org.
This closes #3965


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bbc231c8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bbc231c8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bbc231c8

Branch: refs/heads/master
Commit: bbc231c8eb8ee66cd5fdca74d6aec76dec5e4aa0
Parents: b0030ae 79c55db
Author: Ahmet Altay <al...@google.com>
Authored: Tue Oct 10 14:28:29 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Oct 10 14:28:29 2017 -0700

----------------------------------------------------------------------
 ...ob_beam_PreCommit_Python_MavenInstall.groovy |   2 +-
 sdks/go/cmd/beamctl/cmd/root.go                 |   1 +
 sdks/python/container/Dockerfile                |  27 ++++
 sdks/python/container/boot.go                   | 122 +++++++++++++++
 sdks/python/container/pom.xml                   | 154 +++++++++++++++++++
 sdks/python/pom.xml                             |   7 +
 6 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------