You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by rm...@apache.org on 2016/06/21 08:37:27 UTC

[2/2] incubator-beam git commit: better comments for win workaround and basic sanity checks for winutils.exe

better comments for win workaround and basic sanity checks for winutils.exe


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/460d21cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/460d21cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/460d21cb

Branch: refs/heads/BEAM-357_windows-build-fails
Commit: 460d21cb7070603f789da9d13e12668194c91e9b
Parents: 4188330
Author: Romain manni-Bucau <rm...@gmail.com>
Authored: Tue Jun 21 10:37:05 2016 +0200
Committer: Romain manni-Bucau <rm...@gmail.com>
Committed: Tue Jun 21 10:37:05 2016 +0200

----------------------------------------------------------------------
 .../beam/runners/flink/WriteSinkITCase.java     |   2 +-
 .../beam/sdk/testing/HadoopWorkarounds.java     | 109 +++++++++++++++++--
 sdks/java/io/hdfs/pom.xml                       |   9 --
 sdks/java/maven-archetypes/starter/pom.xml      |   6 +-
 4 files changed, 104 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
index 1a56350..bb3778d 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
@@ -54,7 +54,7 @@ public class WriteSinkITCase extends JavaProgramTestBase {
 
   @Override
   protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
+    resultPath = getTempDirPath("result-" + System.nanoTime());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
index ee2e135..1c2aa20 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.sdk.testing;
 
+import static java.util.Arrays.asList;
+
 import org.apache.commons.compress.utils.IOUtils;
 
 import java.io.File;
@@ -26,15 +28,21 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.nio.file.Files;
+import java.util.Arrays;
 
 /**
  * A simple class ensure winutils.exe can be found in the JVM.
+ * <p>
+ * See http://wiki.apache.org/hadoop/WindowsProblems for details.
+ * <p>
+ * Note: don't forget to add org.bouncycastle:bcpg-jdk16 dependency to use it.
  */
 public class HadoopWorkarounds {
     /**
      * In practise this method only needs to be called once by JVM
      * since hadoop uses static variables to store it.
-     *
+     * <p>
      * Note: ensure invocation is done before hadoop reads it
      * and ensure this folder survives tests
      * (avoid temporary folder usage since tests can share it).
@@ -51,6 +59,8 @@ public class HadoopWorkarounds {
         // hadoop doesn't have winutils.exe :(: https://issues.apache.org/jira/browse/HADOOP-10051
         // so use this github repo temporarly then just use the main tar.gz
         /*
+        // note this commented code requires commons-compress dependency (to add if we use that)
+
         String hadoopVersion = VersionInfo.getVersion();
         final URL url = new URL("https://archive.apache.org/dist/hadoop/common/
                   hadoop-" + hadoopVersion + "/hadoop-" + hadoopVersion + ".tar.gz");
@@ -97,19 +107,49 @@ public class HadoopWorkarounds {
                 + "-Dhadoop.home.dir so we'll download winutils.exe");
 
         new File(hadoopHome, "bin").mkdirs();
-        final URL url;
-        try {
-            url = new URL("https://github.com/steveloughran/winutils/"
-                    + "raw/master/hadoop-2.7.1/bin/winutils.exe");
-        } catch (final MalformedURLException e) { // unlikely
-            throw new IllegalArgumentException(e);
+        final File winutils = new File(hadoopHome, "bin/winutils.exe");
+
+        for (final String suffix : asList("", ".asc")) {
+            final URL url;
+            try {
+                // this code is not a random URL - read HADOOP-10051
+                // it is provided and signed with an ASF gpg key.
+
+                // note: 2.6.3 cause 2.6.4, 2.7.1 don't have .asc
+                url = new URL("https://github.com/steveloughran/winutils/"
+                        + "raw/master/hadoop-2.6.3/bin/winutils.exe" + suffix);
+            } catch (final MalformedURLException e) { // unlikely
+                throw new IllegalArgumentException(e);
+            }
+
+            // download winutils.exe
+            try {
+                try (final InputStream is = url.openStream();
+                     final OutputStream os = new FileOutputStream(
+                             new File(hadoopHome, "bin/winutils.exe" + suffix))) {
+                    try {
+                        IOUtils.copy(is, os, 1024 * 1024);
+                    } catch (final IOException e) {
+                        throw new IllegalStateException(e);
+                    }
+                }
+            } catch (final IOException e) {
+                throw new IllegalStateException(e);
+            }
         }
+
+        // get the gpg key which is supposed to have signed the winutils.exe
+        final File gpg = new File(hadoopHome, "bin/gpg");
         try {
-            try (final InputStream is = url.openStream();
-                 final OutputStream os = new FileOutputStream(
-                         new File(hadoopHome, "bin/winutils.exe"))) {
+            /*
+            key is https://github.com/steveloughran/winutils/blob/master/KEYS
+            bu we trust the ASF not github so use the one we trust.
+             */
+            final URL gpgUrl = new URL("http://home.apache.org/keys/committer/stevel");
+            try (final InputStream is = gpgUrl.openStream();
+                 final OutputStream os = new FileOutputStream(gpg)) {
                 try {
-                    IOUtils.copy(is, os, 1024 * 1024);
+                    IOUtils.copy(is, os);
                 } catch (final IOException e) {
                     throw new IllegalStateException(e);
                 }
@@ -117,9 +157,56 @@ public class HadoopWorkarounds {
         } catch (final IOException e) {
             throw new IllegalStateException(e);
         }
+
+        final File ascFile = new File(winutils.getParentFile(), winutils.getName() + ".asc");
+        try {
+            sanityCheck(winutils, ascFile, gpg);
+        } catch (IOException e) {
+            throw new IllegalStateException("Invalid download");
+        }
+
         System.setProperty("hadoop.home.dir", hadoopHome.getAbsolutePath());
     }
 
+    // TODO: replace with gpg --verify?
+    // for now it is just some basic sanity checks to ensure we use the files we think
+    private static void sanityCheck(
+            final File winutils, final File ascFile, final File gpg)
+            throws IOException {
+
+        final byte[] asc = Files.readAllBytes(ascFile.toPath());
+        final byte[] expectedAsc = ("-----BEGIN PGP SIGNATURE-----\n"
+                + "Comment: GPGTools - https://gpgtools.org\n"
+                + "\n"
+                + "iQIcBAABCgAGBQJWeb5GAAoJEKkkVPkXR4a0qUgP/1u1Z5vV+IvU/8w79HIYX56+\n"
+                + "FHMRGxM5953dggqjhGSBtfx62YA8oxhDP+8qLpQWtfjTC3//CW1Oz5hrkL0m+Am5\n"
+                + "Kf+qiINDLqX3Fsc4wHQvnLMt2pJPmm4K9FtpkedCdAchLOiM6Wr7WtGiWYQAdUh0\n"
+                + "5FjUZLLVx95Kj3cTY+1B/BL+z/hB63Ry2AC29oZG4fCuAH1nTZjhH3vBD1/kzS+E\n"
+                + "LEKHrGh/pP6ADgg9AfJvVmRhidlCVi21ZfwWHAaitwDTMFvtFSGq03A3F6Xn2iyQ\n"
+                + "3H6RcZ8dqEbtUEa1jOh1xNGzqP4oipWe0KQJ/Lx2eiSh8te73k/Pfw1Ta9CuHXqk\n"
+                + "n8ko7cBc/pUm7nXbfjiURtWFJ4corT4oahJQna+GgvYR4BrYVLlSGb5VijTkzb7i\n"
+                + "0XU40BM5sOcDS/I0lkvqKP0mSi+mMJXbm10y0jw2S7KR7KeHLwzybsjco05DfWUD\n"
+                + "fSaCHK726g5SLsWJvZaurwna7+Mepzmo1HpAVy6nAuiAa2OQVIioNyFanIbuhbM3\n"
+                + "7PXBDWbfPOgr1WbYW4TASoepvsuJsAahYf2SlGagByOiDNliDHJi1z+ArfWsCFFh\n"
+                + "fAMMzPLKJwkmKPahyej3MrcywtntX68D7R8wTCAaj3xCxJsvX4IRv6YRk1+hQ2je\n"
+                + "EXQFW2c8nTI6XqtFpsbw\n"
+                + "=42+k\n"
+                + "-----END PGP SIGNATURE-----\n").getBytes("UTF-8");
+        if (!Arrays.equals(asc, expectedAsc)) {
+            throw new IllegalArgumentException(
+                    "Invalid asc file, did the repo get corrupted?");
+        }
+
+        final byte[] exe = Files.readAllBytes(winutils.toPath());
+        if (exe.length != 108032 || exe[0] != 77
+                || exe[exe.length - 1] != 0 || exe[exe.length / 3] != -127) {
+            throw new IllegalArgumentException(
+                    "Invalid winutils.exe file, did the repo get corrupted?");
+        }
+
+        // for now we ignore gpg cause it is useless until we can use gpg tools
+    }
+
     /**
      * Just a convenient win(File) invocation for tests.
      */

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/io/hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/pom.xml b/sdks/java/io/hdfs/pom.xml
index f8e3c14..9c30792 100644
--- a/sdks/java/io/hdfs/pom.xml
+++ b/sdks/java/io/hdfs/pom.xml
@@ -83,14 +83,5 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
-
-    <!-- see HDFSFileSourceTest commented block
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-compress</artifactId>
-      <version>1.9</version>
-      <scope>test</scope>
-    </dependency>
-    -->
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
index 9fb21e9..3d8267e 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -61,7 +61,11 @@
                 <goal>integration-test</goal>
               </goals>
               <configuration>
-                <ignoreEOLStyle>true</ignoreEOLStyle> <!-- for win -->
+                <!--
+                For windows since project files use \n and win uses during the generation \r\n.
+                Since it doesn't change the validity of the generated files (java, xml) we are fine doing it.
+                -->
+                <ignoreEOLStyle>true</ignoreEOLStyle>
               </configuration>
             </execution>
           </executions>