You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bh...@apache.org on 2020/05/29 17:08:46 UTC

[beam] branch release-2.22.0 updated: [BEAM-10078] uniquify Dataflow specific jars when staging (#11814) (#11859)

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch release-2.22.0
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/release-2.22.0 by this push:
     new 0513545  [BEAM-10078] uniquify Dataflow specific jars when staging (#11814) (#11859)
0513545 is described below

commit 051354576889cba96337cae34bf438408a5ff6e2
Author: Brian Hulette <bh...@google.com>
AuthorDate: Fri May 29 10:08:32 2020 -0700

    [BEAM-10078] uniquify Dataflow specific jars when staging (#11814) (#11859)
    
    * [BEAM-10078] uniquify Dataflow specific jars when staging
    
    * add comments and logging
    
    Co-authored-by: Heejong Lee <he...@gmail.com>
---
 .../beam/runners/dataflow/util/PackageUtil.java       | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
index 7213993..036d2db 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/PackageUtil.java
@@ -397,10 +397,27 @@ public class PackageUtil implements Closeable {
             String.format("Non-existent file to stage: %s", file.getAbsolutePath()));
       }
       checkState(!file.isDirectory(), "Source file must not be a directory.");
+      String target;
+      // Dataflow worker jar and windmill binary can be overridden by providing files with
+      // predefined file names. Normally, we can use the artifact file name as same as
+      // the last component of GCS object resource path. However, we need special handling
+      // for those predefined names since they also need to be unique even in the same
+      // staging directory.
+      switch (dest) {
+        case "dataflow-worker.jar":
+        case "windmill_main":
+          target =
+              Environments.createStagingFileName(
+                  file, Files.asByteSource(file).hash(Hashing.sha256()));
+          LOG.info("Staging custom {} as {}", dest, target);
+          break;
+        default:
+          target = dest;
+      }
       DataflowPackage destination = new DataflowPackage();
       String resourcePath =
           FileSystems.matchNewResource(stagingPath, true)
-              .resolve(dest, StandardResolveOptions.RESOLVE_FILE)
+              .resolve(target, StandardResolveOptions.RESOLVE_FILE)
               .toString();
       destination.setLocation(resourcePath);
       destination.setName(dest);