You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/05/18 20:08:07 UTC
[07/50] [abbrv] beam git commit: [BEAM-2277] Add ResourceIdTester and
test existing ResourceId implementations
[BEAM-2277] Add ResourceIdTester and test existing ResourceId implementations
A first cut at some of the parts of the ResourceId spec.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a6a5ff7b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a6a5ff7b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a6a5ff7b
Branch: refs/heads/gearpump-runner
Commit: a6a5ff7be387ef295fc7f921de36a3ea77327bc1
Parents: fbb0de1
Author: Dan Halperin <dh...@google.com>
Authored: Fri May 12 09:20:34 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Fri May 12 14:59:10 2017 -0700
----------------------------------------------------------------------
.../apache/beam/sdk/io/LocalResourceIdTest.java | 6 +
.../apache/beam/sdk/io/fs/ResourceIdTester.java | 151 +++++++++++++++++++
.../google-cloud-platform-core/pom.xml | 6 +
.../gcp/storage/GcsResourceIdTest.java | 9 ++
sdks/java/io/hadoop-file-system/pom.xml | 13 ++
.../beam/sdk/io/hdfs/HadoopResourceIdTest.java | 63 ++++++++
6 files changed, 248 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
index 7ea85cf..e1ca303 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
@@ -31,6 +31,7 @@ import java.io.File;
import java.nio.file.Paths;
import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
import org.apache.commons.lang3.SystemUtils;
import org.junit.Rule;
import org.junit.Test;
@@ -259,6 +260,11 @@ public class LocalResourceIdTest {
"xyz.txt");
}
+ @Test
+ public void testResourceIdTester() throws Exception {
+ ResourceIdTester.runResourceIdBattery(toResourceIdentifier("/tmp/foo/"));
+ }
+
private LocalResourceId toResourceIdentifier(String str) throws Exception {
boolean isDirectory;
if (SystemUtils.IS_OS_WINDOWS) {
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
new file mode 100644
index 0000000..fe50ada
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.fs;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_DIRECTORY;
+import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_FILE;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.testing.EqualsTester;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.io.FileSystems;
+
+/**
+ * A utility to test {@link ResourceId} implementations.
+ */
+@Experimental(Kind.FILESYSTEM)
+public final class ResourceIdTester {
+ /**
+ * Enforces that the {@link ResourceId} implementation of {@code baseDirectory} meets the
+ * {@link ResourceId} spec.
+ */
+ public static void runResourceIdBattery(ResourceId baseDirectory) {
+ checkArgument(
+ baseDirectory.isDirectory(), "baseDirectory %s is not a directory", baseDirectory);
+
+ List<ResourceId> allResourceIds = new ArrayList<>();
+ allResourceIds.add(baseDirectory);
+
+ // Validate that individual resources meet the fairly restrictive spec we have.
+ validateResourceIds(allResourceIds);
+
+ // Validate operations with resolving child resources.
+ validateResolvingIds(baseDirectory, allResourceIds);
+
+ // Validate safeguards against resolving bad paths.
+ validateFailureResolvingIds(baseDirectory);
+ }
+
+ private static void validateResolvingIds(
+ ResourceId baseDirectory, List<ResourceId> allResourceIds) {
+ ResourceId file1 = baseDirectory.resolve("child1", RESOLVE_FILE);
+ ResourceId file2 = baseDirectory.resolve("child2", RESOLVE_FILE);
+ ResourceId file2a = baseDirectory.resolve("child2", RESOLVE_FILE);
+ allResourceIds.add(file1);
+ allResourceIds.add(file2);
+ assertFalse("Resolved file isDirectory()", file1.isDirectory());
+ assertFalse("Resolved file isDirectory()", file2.isDirectory());
+ assertFalse("Resolved file isDirectory()", file2a.isDirectory());
+
+ ResourceId dir1 = baseDirectory.resolve("child1", RESOLVE_DIRECTORY);
+ ResourceId dir2 = baseDirectory.resolve("child2", RESOLVE_DIRECTORY);
+ ResourceId dir2a = baseDirectory.resolve("child2", RESOLVE_DIRECTORY);
+ assertTrue("Resolved directory isDirectory()", dir1.isDirectory());
+ assertTrue("Resolved directory isDirectory()", dir2.isDirectory());
+ assertTrue("Resolved directory isDirectory()", dir2a.isDirectory());
+ allResourceIds.add(dir1);
+ allResourceIds.add(dir2);
+
+ // ResourceIds in equality groups.
+ new EqualsTester()
+ .addEqualityGroup(file1)
+ .addEqualityGroup(file2, file2a)
+ .addEqualityGroup(dir1, dir1.getCurrentDirectory())
+ .addEqualityGroup(dir2, dir2a, dir2.getCurrentDirectory())
+ .addEqualityGroup(baseDirectory, file1.getCurrentDirectory(), file2.getCurrentDirectory())
+ .testEquals();
+
+ // ResourceId toString() in equality groups.
+ new EqualsTester()
+ .addEqualityGroup(file1.toString())
+ .addEqualityGroup(file2.toString(), file2a.toString())
+ .addEqualityGroup(dir1.toString(), dir1.getCurrentDirectory().toString())
+ .addEqualityGroup(dir2.toString(), dir2a.toString(), dir2.getCurrentDirectory().toString())
+ .addEqualityGroup(
+ baseDirectory.toString(),
+ file1.getCurrentDirectory().toString(),
+ file2.getCurrentDirectory().toString())
+ .testEquals();
+
+ // TODO: test resolving strings that need to be escaped.
+ // Possible spec: https://tools.ietf.org/html/rfc3986#section-2
+ // May need options to be filesystem-independent, e.g., if filesystems ban certain chars.
+ }
+
+ private static void validateFailureResolvingIds(ResourceId baseDirectory) {
+ try {
+ ResourceId badFile = baseDirectory.resolve("file/", RESOLVE_FILE);
+ fail(String.format("Resolving badFile %s should have failed", badFile));
+ } catch (Throwable t) {
+ // expected
+ }
+
+ ResourceId file = baseDirectory.resolve("file", RESOLVE_FILE);
+ try {
+ baseDirectory.resolve("file2", RESOLVE_FILE);
+ fail(String.format("Should not be able to resolve against file resource %s", file));
+ } catch (Throwable t) {
+ // expected
+ }
+ }
+
+ private static void validateResourceIds(List<ResourceId> resourceIds) {
+ for (ResourceId resourceId : resourceIds) {
+ // ResourceIds should equal themselves.
+ assertThat("ResourceId equal to itself", resourceId, equalTo(resourceId));
+
+ // ResourceIds should be clonable via FileSystems#matchNewResource.
+ ResourceId cloned;
+ if (resourceId.isDirectory()) {
+ cloned = FileSystems.matchNewResource(resourceId.toString(), true /* isDirectory */);
+ } else {
+ cloned = FileSystems.matchNewResource(resourceId.toString(), false /* isDirectory */);
+ }
+ assertThat(
+ "ResourceId equals clone of itself", cloned, equalTo(resourceId));
+ // .. and clones have consistent toString.
+ assertThat(
+ "ResourceId toString consistency", cloned.toString(), equalTo(resourceId.toString()));
+ // .. and have consistent isDirectory.
+ assertThat(
+ "ResourceId isDirectory consistency",
+ cloned.isDirectory(),
+ equalTo(resourceId.isDirectory()));
+ }
+ }
+
+ private ResourceIdTester() {} // prevent instantiation
+}
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/google-cloud-platform-core/pom.xml b/sdks/java/extensions/google-cloud-platform-core/pom.xml
index a1baea1..e4e951b 100644
--- a/sdks/java/extensions/google-cloud-platform-core/pom.xml
+++ b/sdks/java/extensions/google-cloud-platform-core/pom.xml
@@ -152,6 +152,12 @@
<!-- test dependencies -->
<dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava-testlib</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-core</artifactId>
<classifier>tests</classifier>
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
index b245610..2a67501 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
@@ -22,8 +22,11 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
+import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
+import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.util.gcsfs.GcsPath;
import org.junit.Rule;
import org.junit.Test;
@@ -163,6 +166,12 @@ public class GcsResourceIdTest {
"xyz.txt");
}
+ @Test
+ public void testResourceIdTester() throws Exception {
+ FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions());
+ ResourceIdTester.runResourceIdBattery(toResourceIdentifier("gs://bucket/foo/"));
+ }
+
private GcsResourceId toResourceIdentifier(String str) throws Exception {
return GcsResourceId.fromGcsPath(GcsPath.fromUri(str));
}
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/pom.xml b/sdks/java/io/hadoop-file-system/pom.xml
index 423237b..db5a1db 100644
--- a/sdks/java/io/hadoop-file-system/pom.xml
+++ b/sdks/java/io/hadoop-file-system/pom.xml
@@ -157,6 +157,19 @@
</dependency>
<dependency>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>beam-sdks-java-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava-testlib</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<scope>test</scope>
http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
new file mode 100644
index 0000000..b0d821b
--- /dev/null
+++ b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.hdfs;
+
+import java.net.URI;
+import org.apache.beam.sdk.io.FileSystems;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Tests for {@link HadoopResourceId}.
+ */
+public class HadoopResourceIdTest {
+ private Configuration configuration;
+ private MiniDFSCluster hdfsCluster;
+ private URI hdfsClusterBaseUri;
+ private HadoopFileSystem fileSystem;
+ @Rule
+ public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+ @Before
+ public void setUp() throws Exception {
+ configuration = new Configuration();
+ configuration.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tmpFolder.getRoot().getAbsolutePath());
+ MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(configuration);
+ hdfsCluster = builder.build();
+ hdfsClusterBaseUri = new URI(configuration.get("fs.defaultFS") + "/");
+ fileSystem = new HadoopFileSystem(configuration);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ hdfsCluster.shutdown();
+ }
+
+ @Test
+ public void testResourceIdTester() throws Exception {
+ FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions());
+ ResourceIdTester.runResourceIdBattery(new HadoopResourceId(hdfsClusterBaseUri));
+ }
+}