You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/05/18 20:08:07 UTC

[07/50] [abbrv] beam git commit: [BEAM-2277] Add ResourceIdTester and test existing ResourceId implementations

[BEAM-2277] Add ResourceIdTester and test existing ResourceId implementations

A first cut at some of the parts of the ResourceId spec.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a6a5ff7b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a6a5ff7b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a6a5ff7b

Branch: refs/heads/gearpump-runner
Commit: a6a5ff7be387ef295fc7f921de36a3ea77327bc1
Parents: fbb0de1
Author: Dan Halperin <dh...@google.com>
Authored: Fri May 12 09:20:34 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Fri May 12 14:59:10 2017 -0700

----------------------------------------------------------------------
 .../apache/beam/sdk/io/LocalResourceIdTest.java |   6 +
 .../apache/beam/sdk/io/fs/ResourceIdTester.java | 151 +++++++++++++++++++
 .../google-cloud-platform-core/pom.xml          |   6 +
 .../gcp/storage/GcsResourceIdTest.java          |   9 ++
 sdks/java/io/hadoop-file-system/pom.xml         |  13 ++
 .../beam/sdk/io/hdfs/HadoopResourceIdTest.java  |  63 ++++++++
 6 files changed, 248 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
index 7ea85cf..e1ca303 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/LocalResourceIdTest.java
@@ -31,6 +31,7 @@ import java.io.File;
 import java.nio.file.Paths;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
 import org.apache.commons.lang3.SystemUtils;
 import org.junit.Rule;
 import org.junit.Test;
@@ -259,6 +260,11 @@ public class LocalResourceIdTest {
         "xyz.txt");
   }
 
+  @Test
+  public void testResourceIdTester() throws Exception {
+    ResourceIdTester.runResourceIdBattery(toResourceIdentifier("/tmp/foo/"));
+  }
+
   private LocalResourceId toResourceIdentifier(String str) throws Exception {
     boolean isDirectory;
     if (SystemUtils.IS_OS_WINDOWS) {

http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
new file mode 100644
index 0000000..fe50ada
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/fs/ResourceIdTester.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.fs;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_DIRECTORY;
+import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_FILE;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.testing.EqualsTester;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.io.FileSystems;
+
+/**
+ * A utility to test {@link ResourceId} implementations.
+ */
+@Experimental(Kind.FILESYSTEM)
+public final class ResourceIdTester {
+  /**
+   * Enforces that the {@link ResourceId} implementation of {@code baseDirectory} meets the
+   * {@link ResourceId} spec.
+   */
+  public static void runResourceIdBattery(ResourceId baseDirectory) {
+    checkArgument(
+        baseDirectory.isDirectory(), "baseDirectory %s is not a directory", baseDirectory);
+
+    List<ResourceId> allResourceIds = new ArrayList<>();
+    allResourceIds.add(baseDirectory);
+
+    // Validate that individual resources meet the fairly restrictive spec we have.
+    validateResourceIds(allResourceIds);
+
+    // Validate operations with resolving child resources.
+    validateResolvingIds(baseDirectory, allResourceIds);
+
+    // Validate safeguards against resolving bad paths.
+    validateFailureResolvingIds(baseDirectory);
+  }
+
+  private static void validateResolvingIds(
+      ResourceId baseDirectory, List<ResourceId> allResourceIds) {
+    ResourceId file1 = baseDirectory.resolve("child1", RESOLVE_FILE);
+    ResourceId file2 = baseDirectory.resolve("child2", RESOLVE_FILE);
+    ResourceId file2a = baseDirectory.resolve("child2", RESOLVE_FILE);
+    allResourceIds.add(file1);
+    allResourceIds.add(file2);
+    assertFalse("Resolved file isDirectory()", file1.isDirectory());
+    assertFalse("Resolved file isDirectory()", file2.isDirectory());
+    assertFalse("Resolved file isDirectory()", file2a.isDirectory());
+
+    ResourceId dir1 = baseDirectory.resolve("child1", RESOLVE_DIRECTORY);
+    ResourceId dir2 = baseDirectory.resolve("child2", RESOLVE_DIRECTORY);
+    ResourceId dir2a = baseDirectory.resolve("child2", RESOLVE_DIRECTORY);
+    assertTrue("Resolved directory isDirectory()", dir1.isDirectory());
+    assertTrue("Resolved directory isDirectory()", dir2.isDirectory());
+    assertTrue("Resolved directory isDirectory()", dir2a.isDirectory());
+    allResourceIds.add(dir1);
+    allResourceIds.add(dir2);
+
+    // ResourceIds in equality groups.
+    new EqualsTester()
+        .addEqualityGroup(file1)
+        .addEqualityGroup(file2, file2a)
+        .addEqualityGroup(dir1, dir1.getCurrentDirectory())
+        .addEqualityGroup(dir2, dir2a, dir2.getCurrentDirectory())
+        .addEqualityGroup(baseDirectory, file1.getCurrentDirectory(), file2.getCurrentDirectory())
+        .testEquals();
+
+    // ResourceId toString() in equality groups.
+    new EqualsTester()
+        .addEqualityGroup(file1.toString())
+        .addEqualityGroup(file2.toString(), file2a.toString())
+        .addEqualityGroup(dir1.toString(), dir1.getCurrentDirectory().toString())
+        .addEqualityGroup(dir2.toString(), dir2a.toString(), dir2.getCurrentDirectory().toString())
+        .addEqualityGroup(
+            baseDirectory.toString(),
+            file1.getCurrentDirectory().toString(),
+            file2.getCurrentDirectory().toString())
+        .testEquals();
+
+    // TODO: test resolving strings that need to be escaped.
+    //   Possible spec: https://tools.ietf.org/html/rfc3986#section-2
+    //   May need options to be filesystem-independent, e.g., if filesystems ban certain chars.
+  }
+
+  private static void validateFailureResolvingIds(ResourceId baseDirectory) {
+    try {
+      ResourceId badFile = baseDirectory.resolve("file/", RESOLVE_FILE);
+      fail(String.format("Resolving badFile %s should have failed", badFile));
+    } catch (Throwable t) {
+      // expected
+    }
+
+    ResourceId file = baseDirectory.resolve("file", RESOLVE_FILE);
+    try {
+      baseDirectory.resolve("file2", RESOLVE_FILE);
+      fail(String.format("Should not be able to resolve against file resource %s", file));
+    } catch (Throwable t) {
+      // expected
+    }
+  }
+
+  private static void validateResourceIds(List<ResourceId> resourceIds) {
+    for (ResourceId resourceId : resourceIds) {
+      // ResourceIds should equal themselves.
+      assertThat("ResourceId equal to itself", resourceId, equalTo(resourceId));
+
+      // ResourceIds should be clonable via FileSystems#matchNewResource.
+      ResourceId cloned;
+      if (resourceId.isDirectory()) {
+        cloned = FileSystems.matchNewResource(resourceId.toString(), true /* isDirectory */);
+      } else {
+        cloned = FileSystems.matchNewResource(resourceId.toString(), false /* isDirectory */);
+      }
+      assertThat(
+          "ResourceId equals clone of itself", cloned, equalTo(resourceId));
+      // .. and clones have consistent toString.
+      assertThat(
+          "ResourceId toString consistency", cloned.toString(), equalTo(resourceId.toString()));
+      // .. and have consistent isDirectory.
+      assertThat(
+          "ResourceId isDirectory consistency",
+          cloned.isDirectory(),
+          equalTo(resourceId.isDirectory()));
+    }
+  }
+
+  private ResourceIdTester() {} // prevent instantiation
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/google-cloud-platform-core/pom.xml b/sdks/java/extensions/google-cloud-platform-core/pom.xml
index a1baea1..e4e951b 100644
--- a/sdks/java/extensions/google-cloud-platform-core/pom.xml
+++ b/sdks/java/extensions/google-cloud-platform-core/pom.xml
@@ -152,6 +152,12 @@
 
     <!-- test dependencies -->
     <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava-testlib</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-core</artifactId>
       <classifier>tests</classifier>

http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
index b245610..2a67501 100644
--- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
+++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/storage/GcsResourceIdTest.java
@@ -22,8 +22,11 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
+import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
+import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.util.gcsfs.GcsPath;
 import org.junit.Rule;
 import org.junit.Test;
@@ -163,6 +166,12 @@ public class GcsResourceIdTest {
         "xyz.txt");
   }
 
+  @Test
+  public void testResourceIdTester() throws Exception {
+    FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions());
+    ResourceIdTester.runResourceIdBattery(toResourceIdentifier("gs://bucket/foo/"));
+  }
+
   private GcsResourceId toResourceIdentifier(String str) throws Exception {
     return GcsResourceId.fromGcsPath(GcsPath.fromUri(str));
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/pom.xml b/sdks/java/io/hadoop-file-system/pom.xml
index 423237b..db5a1db 100644
--- a/sdks/java/io/hadoop-file-system/pom.xml
+++ b/sdks/java/io/hadoop-file-system/pom.xml
@@ -157,6 +157,19 @@
     </dependency>
 
     <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-core</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava-testlib</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
       <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/beam/blob/a6a5ff7b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
new file mode 100644
index 0000000..b0d821b
--- /dev/null
+++ b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopResourceIdTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.hdfs;
+
+import java.net.URI;
+import org.apache.beam.sdk.io.FileSystems;
+import org.apache.beam.sdk.io.fs.ResourceIdTester;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Tests for {@link HadoopResourceId}.
+ */
+public class HadoopResourceIdTest {
+  private Configuration configuration;
+  private MiniDFSCluster hdfsCluster;
+  private URI hdfsClusterBaseUri;
+  private HadoopFileSystem fileSystem;
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Before
+  public void setUp() throws Exception {
+    configuration = new Configuration();
+    configuration.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tmpFolder.getRoot().getAbsolutePath());
+    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(configuration);
+    hdfsCluster = builder.build();
+    hdfsClusterBaseUri = new URI(configuration.get("fs.defaultFS") + "/");
+    fileSystem = new HadoopFileSystem(configuration);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    hdfsCluster.shutdown();
+  }
+
+  @Test
+  public void testResourceIdTester() throws Exception {
+    FileSystems.setDefaultConfigInWorkers(TestPipeline.testingPipelineOptions());
+    ResourceIdTester.runResourceIdBattery(new HadoopResourceId(hdfsClusterBaseUri));
+  }
+}