You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by rm...@apache.org on 2016/06/21 08:37:27 UTC
[2/2] incubator-beam git commit: better comments for win workaround
and basic sanity checks for winutils.exe
better comments for win workaround and basic sanity checks for winutils.exe
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/460d21cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/460d21cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/460d21cb
Branch: refs/heads/BEAM-357_windows-build-fails
Commit: 460d21cb7070603f789da9d13e12668194c91e9b
Parents: 4188330
Author: Romain manni-Bucau <rm...@gmail.com>
Authored: Tue Jun 21 10:37:05 2016 +0200
Committer: Romain manni-Bucau <rm...@gmail.com>
Committed: Tue Jun 21 10:37:05 2016 +0200
----------------------------------------------------------------------
.../beam/runners/flink/WriteSinkITCase.java | 2 +-
.../beam/sdk/testing/HadoopWorkarounds.java | 109 +++++++++++++++++--
sdks/java/io/hdfs/pom.xml | 9 --
sdks/java/maven-archetypes/starter/pom.xml | 6 +-
4 files changed, 104 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
index 1a56350..bb3778d 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
@@ -54,7 +54,7 @@ public class WriteSinkITCase extends JavaProgramTestBase {
@Override
protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
+ resultPath = getTempDirPath("result-" + System.nanoTime());
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
index ee2e135..1c2aa20 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/HadoopWorkarounds.java
@@ -17,6 +17,8 @@
*/
package org.apache.beam.sdk.testing;
+import static java.util.Arrays.asList;
+
import org.apache.commons.compress.utils.IOUtils;
import java.io.File;
@@ -26,15 +28,21 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.file.Files;
+import java.util.Arrays;
/**
* A simple class ensure winutils.exe can be found in the JVM.
+ * <p>
+ * See http://wiki.apache.org/hadoop/WindowsProblems for details.
+ * <p>
+ * Note: don't forget to add org.bouncycastle:bcpg-jdk16 dependency to use it.
*/
public class HadoopWorkarounds {
/**
* In practise this method only needs to be called once by JVM
* since hadoop uses static variables to store it.
- *
+ * <p>
* Note: ensure invocation is done before hadoop reads it
* and ensure this folder survives tests
* (avoid temporary folder usage since tests can share it).
@@ -51,6 +59,8 @@ public class HadoopWorkarounds {
// hadoop doesn't have winutils.exe :(: https://issues.apache.org/jira/browse/HADOOP-10051
// so use this github repo temporarly then just use the main tar.gz
/*
+ // note this commented code requires commons-compress dependency (to add if we use that)
+
String hadoopVersion = VersionInfo.getVersion();
final URL url = new URL("https://archive.apache.org/dist/hadoop/common/
hadoop-" + hadoopVersion + "/hadoop-" + hadoopVersion + ".tar.gz");
@@ -97,19 +107,49 @@ public class HadoopWorkarounds {
+ "-Dhadoop.home.dir so we'll download winutils.exe");
new File(hadoopHome, "bin").mkdirs();
- final URL url;
- try {
- url = new URL("https://github.com/steveloughran/winutils/"
- + "raw/master/hadoop-2.7.1/bin/winutils.exe");
- } catch (final MalformedURLException e) { // unlikely
- throw new IllegalArgumentException(e);
+ final File winutils = new File(hadoopHome, "bin/winutils.exe");
+
+ for (final String suffix : asList("", ".asc")) {
+ final URL url;
+ try {
+ // this code is not a random URL - read HADOOP-10051
+ // it is provided and signed with an ASF gpg key.
+
+ // note: 2.6.3 cause 2.6.4, 2.7.1 don't have .asc
+ url = new URL("https://github.com/steveloughran/winutils/"
+ + "raw/master/hadoop-2.6.3/bin/winutils.exe" + suffix);
+ } catch (final MalformedURLException e) { // unlikely
+ throw new IllegalArgumentException(e);
+ }
+
+ // download winutils.exe
+ try {
+ try (final InputStream is = url.openStream();
+ final OutputStream os = new FileOutputStream(
+ new File(hadoopHome, "bin/winutils.exe" + suffix))) {
+ try {
+ IOUtils.copy(is, os, 1024 * 1024);
+ } catch (final IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ } catch (final IOException e) {
+ throw new IllegalStateException(e);
+ }
}
+
+ // get the gpg key which is supposed to have signed the winutils.exe
+ final File gpg = new File(hadoopHome, "bin/gpg");
try {
- try (final InputStream is = url.openStream();
- final OutputStream os = new FileOutputStream(
- new File(hadoopHome, "bin/winutils.exe"))) {
+ /*
+ key is https://github.com/steveloughran/winutils/blob/master/KEYS
+ bu we trust the ASF not github so use the one we trust.
+ */
+ final URL gpgUrl = new URL("http://home.apache.org/keys/committer/stevel");
+ try (final InputStream is = gpgUrl.openStream();
+ final OutputStream os = new FileOutputStream(gpg)) {
try {
- IOUtils.copy(is, os, 1024 * 1024);
+ IOUtils.copy(is, os);
} catch (final IOException e) {
throw new IllegalStateException(e);
}
@@ -117,9 +157,56 @@ public class HadoopWorkarounds {
} catch (final IOException e) {
throw new IllegalStateException(e);
}
+
+ final File ascFile = new File(winutils.getParentFile(), winutils.getName() + ".asc");
+ try {
+ sanityCheck(winutils, ascFile, gpg);
+ } catch (IOException e) {
+ throw new IllegalStateException("Invalid download");
+ }
+
System.setProperty("hadoop.home.dir", hadoopHome.getAbsolutePath());
}
+ // TODO: replace with gpg --verify?
+ // for now it is just some basic sanity checks to ensure we use the files we think
+ private static void sanityCheck(
+ final File winutils, final File ascFile, final File gpg)
+ throws IOException {
+
+ final byte[] asc = Files.readAllBytes(ascFile.toPath());
+ final byte[] expectedAsc = ("-----BEGIN PGP SIGNATURE-----\n"
+ + "Comment: GPGTools - https://gpgtools.org\n"
+ + "\n"
+ + "iQIcBAABCgAGBQJWeb5GAAoJEKkkVPkXR4a0qUgP/1u1Z5vV+IvU/8w79HIYX56+\n"
+ + "FHMRGxM5953dggqjhGSBtfx62YA8oxhDP+8qLpQWtfjTC3//CW1Oz5hrkL0m+Am5\n"
+ + "Kf+qiINDLqX3Fsc4wHQvnLMt2pJPmm4K9FtpkedCdAchLOiM6Wr7WtGiWYQAdUh0\n"
+ + "5FjUZLLVx95Kj3cTY+1B/BL+z/hB63Ry2AC29oZG4fCuAH1nTZjhH3vBD1/kzS+E\n"
+ + "LEKHrGh/pP6ADgg9AfJvVmRhidlCVi21ZfwWHAaitwDTMFvtFSGq03A3F6Xn2iyQ\n"
+ + "3H6RcZ8dqEbtUEa1jOh1xNGzqP4oipWe0KQJ/Lx2eiSh8te73k/Pfw1Ta9CuHXqk\n"
+ + "n8ko7cBc/pUm7nXbfjiURtWFJ4corT4oahJQna+GgvYR4BrYVLlSGb5VijTkzb7i\n"
+ + "0XU40BM5sOcDS/I0lkvqKP0mSi+mMJXbm10y0jw2S7KR7KeHLwzybsjco05DfWUD\n"
+ + "fSaCHK726g5SLsWJvZaurwna7+Mepzmo1HpAVy6nAuiAa2OQVIioNyFanIbuhbM3\n"
+ + "7PXBDWbfPOgr1WbYW4TASoepvsuJsAahYf2SlGagByOiDNliDHJi1z+ArfWsCFFh\n"
+ + "fAMMzPLKJwkmKPahyej3MrcywtntX68D7R8wTCAaj3xCxJsvX4IRv6YRk1+hQ2je\n"
+ + "EXQFW2c8nTI6XqtFpsbw\n"
+ + "=42+k\n"
+ + "-----END PGP SIGNATURE-----\n").getBytes("UTF-8");
+ if (!Arrays.equals(asc, expectedAsc)) {
+ throw new IllegalArgumentException(
+ "Invalid asc file, did the repo get corrupted?");
+ }
+
+ final byte[] exe = Files.readAllBytes(winutils.toPath());
+ if (exe.length != 108032 || exe[0] != 77
+ || exe[exe.length - 1] != 0 || exe[exe.length / 3] != -127) {
+ throw new IllegalArgumentException(
+ "Invalid winutils.exe file, did the repo get corrupted?");
+ }
+
+ // for now we ignore gpg cause it is useless until we can use gpg tools
+ }
+
/**
* Just a convenient win(File) invocation for tests.
*/
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/io/hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/pom.xml b/sdks/java/io/hdfs/pom.xml
index f8e3c14..9c30792 100644
--- a/sdks/java/io/hdfs/pom.xml
+++ b/sdks/java/io/hdfs/pom.xml
@@ -83,14 +83,5 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
-
- <!-- see HDFSFileSourceTest commented block
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.9</version>
- <scope>test</scope>
- </dependency>
- -->
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/460d21cb/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
index 9fb21e9..3d8267e 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -61,7 +61,11 @@
<goal>integration-test</goal>
</goals>
<configuration>
- <ignoreEOLStyle>true</ignoreEOLStyle> <!-- for win -->
+ <!--
+ For windows since project files use \n and win uses during the generation \r\n.
+ Since it doesn't change the validity of the generated files (java, xml) we are fine doing it.
+ -->
+ <ignoreEOLStyle>true</ignoreEOLStyle>
</configuration>
</execution>
</executions>