You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2017/04/20 21:23:23 UTC

[1/2] beam git commit: [BEAM-1871] Move over GcsPath to gcp-core

Repository: beam
Updated Branches:
  refs/heads/master 36a12d023 -> 33078d20c


[BEAM-1871] Move over GcsPath to gcp-core


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1a3f350d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1a3f350d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1a3f350d

Branch: refs/heads/master
Commit: 1a3f350d5191e2bbfca980aec2d11e123cd313a0
Parents: 36a12d0
Author: Luke Cwik <lc...@google.com>
Authored: Thu Apr 20 12:26:21 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Thu Apr 20 14:22:35 2017 -0700

----------------------------------------------------------------------
 sdks/java/core/pom.xml                          |   5 -
 .../org/apache/beam/sdk/util/gcsfs/GcsPath.java | 626 -------------------
 .../beam/sdk/util/gcsfs/package-info.java       |  20 -
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |   1 -
 .../apache/beam/sdk/util/gcsfs/GcsPathTest.java | 358 -----------
 sdks/java/extensions/gcp-core/pom.xml           |   5 -
 .../org/apache/beam/sdk/util/gcsfs/GcsPath.java | 626 +++++++++++++++++++
 .../beam/sdk/util/gcsfs/package-info.java       |  20 +
 .../apache/beam/sdk/util/gcsfs/GcsPathTest.java | 358 +++++++++++
 9 files changed, 1004 insertions(+), 1015 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index 930632d..ea4b725 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -140,11 +140,6 @@
     </dependency>
 
     <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-storage</artifactId>
-    </dependency>
-
-    <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
deleted file mode 100644
index 863b01b..0000000
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.util.gcsfs;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Strings.isNullOrEmpty;
-
-import com.google.api.services.storage.model.StorageObject;
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.file.FileSystem;
-import java.nio.file.LinkOption;
-import java.nio.file.Path;
-import java.nio.file.WatchEvent;
-import java.nio.file.WatchKey;
-import java.nio.file.WatchService;
-import java.util.Iterator;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-
-/**
- * Implements the Java NIO {@link Path} API for Google Cloud Storage paths.
- *
- * <p>GcsPath uses a slash ('/') as a directory separator.  Below is
- * a summary of how slashes are treated:
- * <ul>
- *   <li> A GCS bucket may not contain a slash.  An object may contain zero or
- *        more slashes.
- *   <li> A trailing slash always indicates a directory, which is compliant
- *        with POSIX.1-2008.
- *   <li> Slashes separate components of a path.  Empty components are allowed,
- *        these are represented as repeated slashes.  An empty component always
- *        refers to a directory, and always ends in a slash.
- *   <li> {@link #getParent()}} always returns a path ending in a slash, as the
- *        parent of a GcsPath is always a directory.
- *   <li> Use {@link #resolve(String)} to append elements to a GcsPath -- this
- *        applies the rules consistently and is highly recommended over any
- *        custom string concatenation.
- * </ul>
- *
- * <p>GcsPath treats all GCS objects and buckets as belonging to the same
- * filesystem, so the root of a GcsPath is the GcsPath bucket="", object="".
- *
- * <p>Relative paths are not associated with any bucket.  This matches common
- * treatment of Path in which relative paths can be constructed from one
- * filesystem and appended to another filesystem.
- *
- * @see <a href=
- * "http://docs.oracle.com/javase/tutorial/essential/io/pathOps.html"
- * >Java Tutorials: Path Operations</a>
- */
-public class GcsPath implements Path {
-
-  public static final String SCHEME = "gs";
-
-  /**
-   * Creates a GcsPath from a URI.
-   *
-   * <p>The URI must be in the form {@code gs://[bucket]/[path]}, and may not
-   * contain a port, user info, a query, or a fragment.
-   */
-  public static GcsPath fromUri(URI uri) {
-    checkArgument(uri.getScheme().equalsIgnoreCase(SCHEME), "URI: %s is not a GCS URI", uri);
-    checkArgument(uri.getPort() == -1,
-        "GCS URI may not specify port: %s (%i)", uri, uri.getPort());
-    checkArgument(
-        isNullOrEmpty(uri.getUserInfo()),
-        "GCS URI may not specify userInfo: %s (%s)", uri, uri.getUserInfo());
-    checkArgument(
-        isNullOrEmpty(uri.getQuery()),
-        "GCS URI may not specify query: %s (%s)", uri, uri.getQuery());
-    checkArgument(
-        isNullOrEmpty(uri.getFragment()),
-        "GCS URI may not specify fragment: %s (%s)", uri, uri.getFragment());
-
-    return fromUri(uri.toString());
-  }
-
-  /**
-   * Pattern that is used to parse a GCS URL.
-   *
-   * <p>This is used to separate the components.  Verification is handled
-   * separately.
-   */
-  public static final Pattern GCS_URI =
-      Pattern.compile("(?<SCHEME>[^:]+)://(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
-
-  /**
-   * Creates a GcsPath from a URI in string form.
-   *
-   * <p>This does not use URI parsing, which means it may accept patterns that
-   * the URI parser would not accept.
-   */
-  public static GcsPath fromUri(String uri) {
-    Matcher m = GCS_URI.matcher(uri);
-    checkArgument(m.matches(), "Invalid GCS URI: %s", uri);
-
-    checkArgument(m.group("SCHEME").equalsIgnoreCase(SCHEME),
-        "URI: %s is not a GCS URI", uri);
-    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
-  }
-
-  /**
-   * Pattern that is used to parse a GCS resource name.
-   */
-  private static final Pattern GCS_RESOURCE_NAME =
-      Pattern.compile("storage.googleapis.com/(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
-
-  /**
-   * Creates a GcsPath from a OnePlatform resource name in string form.
-   */
-  public static GcsPath fromResourceName(String name) {
-    Matcher m = GCS_RESOURCE_NAME.matcher(name);
-    checkArgument(m.matches(), "Invalid GCS resource name: %s", name);
-
-    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
-  }
-
-  /**
-   * Creates a GcsPath from a {@linkplain StorageObject}.
-   */
-  public static GcsPath fromObject(StorageObject object) {
-    return new GcsPath(null, object.getBucket(), object.getName());
-  }
-
-  /**
-   * Creates a GcsPath from bucket and object components.
-   *
-   * <p>A GcsPath without a bucket name is treated as a relative path, which
-   * is a path component with no linkage to the root element.  This is similar
-   * to a Unix path that does not begin with the root marker (a slash).
-   * GCS has different naming constraints and APIs for working with buckets and
-   * objects, so these two concepts are kept separate to avoid accidental
-   * attempts to treat objects as buckets, or vice versa, as much as possible.
-   *
-   * <p>A GcsPath without an object name is a bucket reference.
-   * A bucket is always a directory, which could be used to lookup or add
-   * files to a bucket, but could not be opened as a file.
-   *
-   * <p>A GcsPath containing neither bucket or object names is treated as
-   * the root of the GCS filesystem.  A listing on the root element would return
-   * the buckets available to the user.
-   *
-   * <p>If {@code null} is passed as either parameter, it is converted to an
-   * empty string internally for consistency.  There is no distinction between
-   * an empty string and a {@code null}, as neither are allowed by GCS.
-   *
-   * @param bucket a GCS bucket name, or none ({@code null} or an empty string)
-   *               if the object is not associated with a bucket
-   *               (e.g. relative paths or the root node).
-   * @param object a GCS object path, or none ({@code null} or an empty string)
-   *               for no object.
-   */
-  public static GcsPath fromComponents(@Nullable String bucket,
-                                       @Nullable String object) {
-    return new GcsPath(null, bucket, object);
-  }
-
-  @Nullable
-  private FileSystem fs;
-  @Nonnull
-  private final String bucket;
-  @Nonnull
-  private final String object;
-
-  /**
-   * Constructs a GcsPath.
-   *
-   * @param fs the associated FileSystem, if any
-   * @param bucket the associated bucket, or none ({@code null} or an empty
-   *               string) for a relative path component
-   * @param object the object, which is a fully-qualified object name if bucket
-   *               was also provided, or none ({@code null} or an empty string)
-   *               for no object
-   * @throws java.lang.IllegalArgumentException if the bucket of object names
-   *         are invalid.
-   */
-  public GcsPath(@Nullable FileSystem fs,
-                 @Nullable String bucket,
-                 @Nullable String object) {
-    if (bucket == null) {
-      bucket = "";
-    }
-    checkArgument(!bucket.contains("/"),
-        "GCS bucket may not contain a slash");
-    checkArgument(bucket.isEmpty()
-                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
-            "GCS bucket names must contain only lowercase letters, numbers, "
-                + "dashes (-), underscores (_), and dots (.). Bucket names "
-                + "must start and end with a number or letter. "
-                + "See https://developers.google.com/storage/docs/bucketnaming "
-                + "for more details.  Bucket name: " + bucket);
-
-    if (object == null) {
-      object = "";
-    }
-    checkArgument(
-        object.indexOf('\n') < 0 && object.indexOf('\r') < 0,
-        "GCS object names must not contain Carriage Return or "
-            + "Line Feed characters.");
-
-    this.fs = fs;
-    this.bucket = bucket;
-    this.object = object;
-  }
-
-  /**
-   * Returns the bucket name associated with this GCS path, or an empty string
-   * if this is a relative path component.
-   */
-  public String getBucket() {
-    return bucket;
-  }
-
-  /**
-   * Returns the object name associated with this GCS path, or an empty string
-   * if no object is specified.
-   */
-  public String getObject() {
-    return object;
-  }
-
-  public void setFileSystem(FileSystem fs) {
-    this.fs = fs;
-  }
-
-  @Override
-  public FileSystem getFileSystem() {
-    return fs;
-  }
-
-  // Absolute paths are those that have a bucket and the root path.
-  @Override
-  public boolean isAbsolute() {
-    return !bucket.isEmpty() || object.isEmpty();
-  }
-
-  @Override
-  public GcsPath getRoot() {
-    return new GcsPath(fs, "", "");
-  }
-
-  @Override
-  public GcsPath getFileName() {
-    int nameCount = getNameCount();
-    if (nameCount < 2) {
-      throw new UnsupportedOperationException(
-          "Can't get filename from root path in the bucket: " + this);
-    }
-    return getName(nameCount - 1);
-  }
-
-  /**
-   * Returns the <em>parent path</em>, or {@code null} if this path does not
-   * have a parent.
-   *
-   * <p>Returns a path that ends in '/', as the parent path always refers to
-   * a directory.
-   */
-  @Override
-  public GcsPath getParent() {
-    if (bucket.isEmpty() && object.isEmpty()) {
-      // The root path has no parent, by definition.
-      return null;
-    }
-
-    if (object.isEmpty()) {
-      // A GCS bucket. All buckets come from a common root.
-      return getRoot();
-    }
-
-    // Skip last character, in case it is a trailing slash.
-    int i = object.lastIndexOf('/', object.length() - 2);
-    if (i <= 0) {
-      if (bucket.isEmpty()) {
-        // Relative paths are not attached to the root node.
-        return null;
-      }
-      return new GcsPath(fs, bucket, "");
-    }
-
-    // Retain trailing slash.
-    return new GcsPath(fs, bucket, object.substring(0, i + 1));
-  }
-
-  @Override
-  public int getNameCount() {
-    int count = bucket.isEmpty() ? 0 : 1;
-    if (object.isEmpty()) {
-      return count;
-    }
-
-    // Add another for each separator found.
-    int index = -1;
-    while ((index = object.indexOf('/', index + 1)) != -1) {
-      count++;
-    }
-
-    return object.endsWith("/") ? count : count + 1;
-  }
-
-  @Override
-  public GcsPath getName(int count) {
-    checkArgument(count >= 0);
-
-    Iterator<Path> iterator = iterator();
-    for (int i = 0; i < count; ++i) {
-      checkArgument(iterator.hasNext());
-      iterator.next();
-    }
-
-    checkArgument(iterator.hasNext());
-    return (GcsPath) iterator.next();
-  }
-
-  @Override
-  public GcsPath subpath(int beginIndex, int endIndex) {
-    checkArgument(beginIndex >= 0);
-    checkArgument(endIndex > beginIndex);
-
-    Iterator<Path> iterator = iterator();
-    for (int i = 0; i < beginIndex; ++i) {
-      checkArgument(iterator.hasNext());
-      iterator.next();
-    }
-
-    GcsPath path = null;
-    while (beginIndex < endIndex) {
-      checkArgument(iterator.hasNext());
-      if (path == null) {
-        path = (GcsPath) iterator.next();
-      } else {
-        path = path.resolve(iterator.next());
-      }
-      ++beginIndex;
-    }
-
-    return path;
-  }
-
-  @Override
-  public boolean startsWith(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath gcsPath = (GcsPath) other;
-      return startsWith(gcsPath.bucketAndObject());
-    } else {
-      return startsWith(other.toString());
-    }
-  }
-
-  @Override
-  public boolean startsWith(String prefix) {
-    return bucketAndObject().startsWith(prefix);
-  }
-
-  @Override
-  public boolean endsWith(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath gcsPath = (GcsPath) other;
-      return endsWith(gcsPath.bucketAndObject());
-    } else {
-      return endsWith(other.toString());
-    }
-  }
-
-  @Override
-  public boolean endsWith(String suffix) {
-    return bucketAndObject().endsWith(suffix);
-  }
-
-  // TODO: support "." and ".." path components?
-  @Override
-  public GcsPath normalize() {
-    return this;
-  }
-
-  @Override
-  public GcsPath resolve(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath path = (GcsPath) other;
-      if (path.isAbsolute()) {
-        return path;
-      } else {
-        return resolve(path.getObject());
-      }
-    } else {
-      return resolve(other.toString());
-    }
-  }
-
-  @Override
-  public GcsPath resolve(String other) {
-    if (bucket.isEmpty() && object.isEmpty()) {
-      // Resolve on a root path is equivalent to looking up a bucket and object.
-      other = SCHEME + "://" + other;
-    }
-
-    if (other.startsWith(SCHEME + "://")) {
-      GcsPath path = GcsPath.fromUri(other);
-      path.setFileSystem(getFileSystem());
-      return path;
-    }
-
-    if (other.isEmpty()) {
-      // An empty component MUST refer to a directory.
-      other = "/";
-    }
-
-    if (object.isEmpty()) {
-      return new GcsPath(fs, bucket, other);
-    } else if (object.endsWith("/")) {
-      return new GcsPath(fs, bucket, object + other);
-    } else {
-      return new GcsPath(fs, bucket, object + "/" + other);
-    }
-  }
-
-  @Override
-  public Path resolveSibling(Path other) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Path resolveSibling(String other) {
-    if (getNameCount() < 2) {
-      throw new UnsupportedOperationException("Can't resolve the sibling of a root path: " + this);
-    }
-    GcsPath parent = getParent();
-    return (parent == null) ? fromUri(other) : parent.resolve(other);
-  }
-
-  @Override
-  public Path relativize(Path other) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public GcsPath toAbsolutePath() {
-    return this;
-  }
-
-  @Override
-  public GcsPath toRealPath(LinkOption... options) throws IOException {
-    return this;
-  }
-
-  @Override
-  public File toFile() {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
-      WatchEvent.Modifier... modifiers) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
-      throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Iterator<Path> iterator() {
-    return new NameIterator(fs, !bucket.isEmpty(), bucketAndObject());
-  }
-
-  private static class NameIterator implements Iterator<Path> {
-    private final FileSystem fs;
-    private boolean fullPath;
-    private String name;
-
-    NameIterator(FileSystem fs, boolean fullPath, String name) {
-      this.fs = fs;
-      this.fullPath = fullPath;
-      this.name = name;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return !isNullOrEmpty(name);
-    }
-
-    @Override
-    public GcsPath next() {
-      int i = name.indexOf('/');
-      String component;
-      if (i >= 0) {
-        component = name.substring(0, i);
-        name = name.substring(i + 1);
-      } else {
-        component = name;
-        name = null;
-      }
-      if (fullPath) {
-        fullPath = false;
-        return new GcsPath(fs, component, "");
-      } else {
-        // Relative paths have no bucket.
-        return new GcsPath(fs, "", component);
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  @Override
-  public int compareTo(Path other) {
-    if (!(other instanceof GcsPath)) {
-      throw new ClassCastException();
-    }
-
-    GcsPath path = (GcsPath) other;
-    int b = bucket.compareTo(path.bucket);
-    if (b != 0) {
-      return b;
-    }
-
-    // Compare a component at a time, so that the separator char doesn't
-    // get compared against component contents.  Eg, "a/b" < "a-1/b".
-    Iterator<Path> left = iterator();
-    Iterator<Path> right = path.iterator();
-
-    while (left.hasNext() && right.hasNext()) {
-      String leftStr = left.next().toString();
-      String rightStr = right.next().toString();
-      int c = leftStr.compareTo(rightStr);
-      if (c != 0) {
-        return c;
-      }
-    }
-
-    if (!left.hasNext() && !right.hasNext()) {
-      return 0;
-    } else {
-      return left.hasNext() ? 1 : -1;
-    }
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    GcsPath paths = (GcsPath) o;
-    return bucket.equals(paths.bucket) && object.equals(paths.object);
-  }
-
-  @Override
-  public int hashCode() {
-    int result = bucket.hashCode();
-    result = 31 * result + object.hashCode();
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    if (!isAbsolute()) {
-      return object;
-    }
-    StringBuilder sb = new StringBuilder();
-    sb.append(SCHEME)
-        .append("://");
-    if (!bucket.isEmpty()) {
-      sb.append(bucket)
-          .append('/');
-    }
-    sb.append(object);
-    return sb.toString();
-  }
-
-  // TODO: Consider using resource names for all GCS paths used by the SDK.
-  public String toResourceName() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("storage.googleapis.com/");
-    if (!bucket.isEmpty()) {
-      sb.append(bucket).append('/');
-    }
-    sb.append(object);
-    return sb.toString();
-  }
-
-  @Override
-  public URI toUri() {
-    try {
-      return new URI(SCHEME, "//" + bucketAndObject(), null);
-    } catch (URISyntaxException e) {
-      throw new RuntimeException("Unable to create URI for GCS path " + this);
-    }
-  }
-
-  private String bucketAndObject() {
-    if (bucket.isEmpty()) {
-      return object;
-    } else {
-      return bucket + "/" + object;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
deleted file mode 100644
index 4d49f8c..0000000
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/** Defines utilities used to interact with Google Cloud Storage. */
-package org.apache.beam.sdk.util.gcsfs;

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java b/sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java
index b6e9205..153bd84 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java
@@ -39,7 +39,6 @@ public class SdkCoreApiSurfaceTest {
         ImmutableSet.of(
             "org.apache.beam",
             "com.google.api.client",
-            "com.google.api.services.storage",
             "com.google.protobuf",
             "com.fasterxml.jackson.annotation",
             "com.fasterxml.jackson.core",

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
deleted file mode 100644
index 426fb16..0000000
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.util.gcsfs;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import java.net.URI;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import org.hamcrest.Matchers;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests of GcsPath.
- */
-@RunWith(JUnit4.class)
-public class GcsPathTest {
-
-  /**
-   * Test case, which tests parsing and building of GcsPaths.
-   */
-  static final class TestCase {
-
-    final String uri;
-    final String expectedBucket;
-    final String expectedObject;
-    final String[] namedComponents;
-
-    TestCase(String uri, String... namedComponents) {
-      this.uri = uri;
-      this.expectedBucket = namedComponents[0];
-      this.namedComponents = namedComponents;
-      this.expectedObject = uri.substring(expectedBucket.length() + 6);
-    }
-  }
-
-  // Each test case is an expected URL, then the components used to build it.
-  // Empty components result in a double slash.
-  static final List<TestCase> PATH_TEST_CASES = Arrays.asList(
-      new TestCase("gs://bucket/then/object", "bucket", "then", "object"),
-      new TestCase("gs://bucket//then/object", "bucket", "", "then", "object"),
-      new TestCase("gs://bucket/then//object", "bucket", "then", "", "object"),
-      new TestCase("gs://bucket/then///object", "bucket", "then", "", "", "object"),
-      new TestCase("gs://bucket/then/object/", "bucket", "then", "object/"),
-      new TestCase("gs://bucket/then/object/", "bucket", "then/", "object/"),
-      new TestCase("gs://bucket/then/object//", "bucket", "then", "object", ""),
-      new TestCase("gs://bucket/then/object//", "bucket", "then", "object/", ""),
-      new TestCase("gs://bucket/", "bucket")
-  );
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void testGcsPathParsing() throws Exception {
-    for (TestCase testCase : PATH_TEST_CASES) {
-      String uriString = testCase.uri;
-
-      GcsPath path = GcsPath.fromUri(URI.create(uriString));
-      // Deconstruction - check bucket, object, and components.
-      assertEquals(testCase.expectedBucket, path.getBucket());
-      assertEquals(testCase.expectedObject, path.getObject());
-      assertEquals(testCase.uri,
-          testCase.namedComponents.length, path.getNameCount());
-
-      // Construction - check that the path can be built from components.
-      GcsPath built = GcsPath.fromComponents(null, null);
-      for (String component : testCase.namedComponents) {
-        built = built.resolve(component);
-      }
-      assertEquals(testCase.uri, built.toString());
-    }
-  }
-
-  @Test
-  public void testParentRelationship() throws Exception {
-    GcsPath path = GcsPath.fromComponents("bucket", "then/object");
-    assertEquals("bucket", path.getBucket());
-    assertEquals("then/object", path.getObject());
-    assertEquals(3, path.getNameCount());
-    assertTrue(path.endsWith("object"));
-    assertTrue(path.startsWith("bucket/then"));
-
-    GcsPath parent = path.getParent();  // gs://bucket/then/
-    assertEquals("bucket", parent.getBucket());
-    assertEquals("then/", parent.getObject());
-    assertEquals(2, parent.getNameCount());
-    assertThat(path, Matchers.not(Matchers.equalTo(parent)));
-    assertTrue(path.startsWith(parent));
-    assertFalse(parent.startsWith(path));
-    assertTrue(parent.endsWith("then/"));
-    assertTrue(parent.startsWith("bucket/then"));
-    assertTrue(parent.isAbsolute());
-
-    GcsPath root = path.getRoot();
-    assertEquals(0, root.getNameCount());
-    assertEquals("gs://", root.toString());
-    assertEquals("", root.getBucket());
-    assertEquals("", root.getObject());
-    assertTrue(root.isAbsolute());
-    assertThat(root, Matchers.equalTo(parent.getRoot()));
-
-    GcsPath grandParent = parent.getParent();  // gs://bucket/
-    assertEquals(1, grandParent.getNameCount());
-    assertEquals("gs://bucket/", grandParent.toString());
-    assertTrue(grandParent.isAbsolute());
-    assertThat(root, Matchers.equalTo(grandParent.getParent()));
-    assertThat(root.getParent(), Matchers.nullValue());
-
-    assertTrue(path.startsWith(path.getRoot()));
-    assertTrue(parent.startsWith(path.getRoot()));
-  }
-
-  @Test
-  public void testRelativeParent() throws Exception {
-    GcsPath path = GcsPath.fromComponents(null, "a/b");
-    GcsPath parent = path.getParent();
-    assertEquals("a/", parent.toString());
-
-    GcsPath grandParent = parent.getParent();
-    assertNull(grandParent);
-  }
-
-  @Test
-  public void testUriSupport() throws Exception {
-    URI uri = URI.create("gs://bucket/some/path");
-
-    GcsPath path = GcsPath.fromUri(uri);
-    assertEquals("bucket", path.getBucket());
-    assertEquals("some/path", path.getObject());
-
-    URI reconstructed = path.toUri();
-    assertEquals(uri, reconstructed);
-
-    path = GcsPath.fromUri("gs://bucket");
-    assertEquals("gs://bucket/", path.toString());
-  }
-
-  @Test
-  public void testBucketParsing() throws Exception {
-    GcsPath path = GcsPath.fromUri("gs://bucket");
-    GcsPath path2 = GcsPath.fromUri("gs://bucket/");
-
-    assertEquals(path, path2);
-    assertEquals(path.toString(), path2.toString());
-    assertEquals(path.toUri(), path2.toUri());
-  }
-
-  @Test
-  public void testGcsPathToString() throws Exception {
-    String filename = "gs://some_bucket/some/file.txt";
-    GcsPath path = GcsPath.fromUri(filename);
-    assertEquals(filename, path.toString());
-  }
-
-  @Test
-  public void testEquals() {
-    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
-    GcsPath a2 = GcsPath.fromComponents(null, "a/b/c");
-    assertFalse(a.isAbsolute());
-    assertFalse(a2.isAbsolute());
-
-    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
-    GcsPath b2 = GcsPath.fromComponents("bucket", "a/b/c");
-    assertTrue(b.isAbsolute());
-    assertTrue(b2.isAbsolute());
-
-    assertEquals(a, a);
-    assertThat(a, Matchers.not(Matchers.equalTo(b)));
-    assertThat(b, Matchers.not(Matchers.equalTo(a)));
-
-    assertEquals(a, a2);
-    assertEquals(a2, a);
-    assertEquals(b, b2);
-    assertEquals(b2, b);
-
-    assertThat(a, Matchers.not(Matchers.equalTo(Paths.get("/tmp/foo"))));
-    assertTrue(a != null);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testInvalidGcsPath() {
-    @SuppressWarnings("unused")
-    GcsPath filename =
-        GcsPath.fromUri("file://invalid/gcs/path");
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testInvalidBucket() {
-    GcsPath.fromComponents("invalid/", "");
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testInvalidObject_newline() {
-    GcsPath.fromComponents(null, "a\nb");
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testInvalidObject_cr() {
-    GcsPath.fromComponents(null, "a\rb");
-  }
-
-  @Test
-  public void testResolveUri() {
-    GcsPath path = GcsPath.fromComponents("bucket", "a/b/c");
-    GcsPath d = path.resolve("gs://bucket2/d");
-    assertEquals("gs://bucket2/d", d.toString());
-  }
-
-  @Test
-  public void testResolveOther() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a");
-    GcsPath b = a.resolve(Paths.get("b"));
-    assertEquals("a/b", b.getObject());
-  }
-
-  @Test
-  public void testGetFileName() {
-    assertEquals("foo", GcsPath.fromUri("gs://bucket/bar/foo").getFileName().toString());
-    assertEquals("foo", GcsPath.fromUri("gs://bucket/foo").getFileName().toString());
-    thrown.expect(UnsupportedOperationException.class);
-    GcsPath.fromUri("gs://bucket/").getFileName();
-  }
-
-  @Test
-  public void testResolveSibling() {
-    assertEquals(
-        "gs://bucket/bar/moo",
-        GcsPath.fromUri("gs://bucket/bar/foo").resolveSibling("moo").toString());
-    assertEquals(
-        "gs://bucket/moo",
-        GcsPath.fromUri("gs://bucket/foo").resolveSibling("moo").toString());
-    thrown.expect(UnsupportedOperationException.class);
-    GcsPath.fromUri("gs://bucket/").resolveSibling("moo");
-  }
-
-  @Test
-  public void testCompareTo() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a");
-    GcsPath b = GcsPath.fromComponents("bucket", "b");
-    GcsPath b2 = GcsPath.fromComponents("bucket2", "b");
-    GcsPath brel = GcsPath.fromComponents(null, "b");
-    GcsPath a2 = GcsPath.fromComponents("bucket", "a");
-    GcsPath arel = GcsPath.fromComponents(null, "a");
-
-    assertThat(a.compareTo(b), Matchers.lessThan(0));
-    assertThat(b.compareTo(a), Matchers.greaterThan(0));
-    assertThat(a.compareTo(a2), Matchers.equalTo(0));
-
-    assertThat(a.hashCode(), Matchers.equalTo(a2.hashCode()));
-    assertThat(a.hashCode(), Matchers.not(Matchers.equalTo(b.hashCode())));
-    assertThat(b.hashCode(), Matchers.not(Matchers.equalTo(brel.hashCode())));
-
-    assertThat(brel.compareTo(b), Matchers.lessThan(0));
-    assertThat(b.compareTo(brel), Matchers.greaterThan(0));
-    assertThat(arel.compareTo(brel), Matchers.lessThan(0));
-    assertThat(brel.compareTo(arel), Matchers.greaterThan(0));
-
-    assertThat(b.compareTo(b2), Matchers.lessThan(0));
-    assertThat(b2.compareTo(b), Matchers.greaterThan(0));
-  }
-
-  @Test
-  public void testCompareTo_ordering() {
-    GcsPath ab = GcsPath.fromComponents("bucket", "a/b");
-    GcsPath abc = GcsPath.fromComponents("bucket", "a/b/c");
-    GcsPath a1b = GcsPath.fromComponents("bucket", "a-1/b");
-
-    assertThat(ab.compareTo(a1b), Matchers.lessThan(0));
-    assertThat(a1b.compareTo(ab), Matchers.greaterThan(0));
-
-    assertThat(ab.compareTo(abc), Matchers.lessThan(0));
-    assertThat(abc.compareTo(ab), Matchers.greaterThan(0));
-  }
-
-  @Test
-  public void testCompareTo_buckets() {
-    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
-    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
-
-    assertThat(a.compareTo(b), Matchers.lessThan(0));
-    assertThat(b.compareTo(a), Matchers.greaterThan(0));
-  }
-
-  @Test
-  public void testIterator() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c");
-    Iterator<Path> it = a.iterator();
-
-    assertTrue(it.hasNext());
-    assertEquals("gs://bucket/", it.next().toString());
-    assertTrue(it.hasNext());
-    assertEquals("a", it.next().toString());
-    assertTrue(it.hasNext());
-    assertEquals("b", it.next().toString());
-    assertTrue(it.hasNext());
-    assertEquals("c", it.next().toString());
-    assertFalse(it.hasNext());
-  }
-
-  @Test
-  public void testSubpath() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
-    assertThat(a.subpath(0, 1).toString(), Matchers.equalTo("gs://bucket/"));
-    assertThat(a.subpath(0, 2).toString(), Matchers.equalTo("gs://bucket/a"));
-    assertThat(a.subpath(0, 3).toString(), Matchers.equalTo("gs://bucket/a/b"));
-    assertThat(a.subpath(0, 4).toString(), Matchers.equalTo("gs://bucket/a/b/c"));
-    assertThat(a.subpath(1, 2).toString(), Matchers.equalTo("a"));
-    assertThat(a.subpath(2, 3).toString(), Matchers.equalTo("b"));
-    assertThat(a.subpath(2, 4).toString(), Matchers.equalTo("b/c"));
-    assertThat(a.subpath(2, 5).toString(), Matchers.equalTo("b/c/d"));
-  }
-
-  @Test
-  public void testGetName() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
-    assertEquals(5, a.getNameCount());
-    assertThat(a.getName(0).toString(), Matchers.equalTo("gs://bucket/"));
-    assertThat(a.getName(1).toString(), Matchers.equalTo("a"));
-    assertThat(a.getName(2).toString(), Matchers.equalTo("b"));
-    assertThat(a.getName(3).toString(), Matchers.equalTo("c"));
-    assertThat(a.getName(4).toString(), Matchers.equalTo("d"));
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testSubPathError() {
-    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
-    a.subpath(1, 1); // throws IllegalArgumentException
-    Assert.fail();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/extensions/gcp-core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/gcp-core/pom.xml b/sdks/java/extensions/gcp-core/pom.xml
index 918632a..d566f94 100644
--- a/sdks/java/extensions/gcp-core/pom.xml
+++ b/sdks/java/extensions/gcp-core/pom.xml
@@ -73,11 +73,6 @@
     </dependency>
 
     <dependency>
-      <groupId>com.google.auth</groupId>
-      <artifactId>google-auth-library-credentials</artifactId>
-    </dependency>
-
-    <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java b/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
new file mode 100644
index 0000000..863b01b
--- /dev/null
+++ b/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/GcsPath.java
@@ -0,0 +1,626 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.util.gcsfs;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Strings.isNullOrEmpty;
+
+import com.google.api.services.storage.model.StorageObject;
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.FileSystem;
+import java.nio.file.LinkOption;
+import java.nio.file.Path;
+import java.nio.file.WatchEvent;
+import java.nio.file.WatchKey;
+import java.nio.file.WatchService;
+import java.util.Iterator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+/**
+ * Implements the Java NIO {@link Path} API for Google Cloud Storage paths.
+ *
+ * <p>GcsPath uses a slash ('/') as a directory separator.  Below is
+ * a summary of how slashes are treated:
+ * <ul>
+ *   <li> A GCS bucket may not contain a slash.  An object may contain zero or
+ *        more slashes.
+ *   <li> A trailing slash always indicates a directory, which is compliant
+ *        with POSIX.1-2008.
+ *   <li> Slashes separate components of a path.  Empty components are allowed,
+ *        these are represented as repeated slashes.  An empty component always
+ *        refers to a directory, and always ends in a slash.
+ *   <li> {@link #getParent()}} always returns a path ending in a slash, as the
+ *        parent of a GcsPath is always a directory.
+ *   <li> Use {@link #resolve(String)} to append elements to a GcsPath -- this
+ *        applies the rules consistently and is highly recommended over any
+ *        custom string concatenation.
+ * </ul>
+ *
+ * <p>GcsPath treats all GCS objects and buckets as belonging to the same
+ * filesystem, so the root of a GcsPath is the GcsPath bucket="", object="".
+ *
+ * <p>Relative paths are not associated with any bucket.  This matches common
+ * treatment of Path in which relative paths can be constructed from one
+ * filesystem and appended to another filesystem.
+ *
+ * @see <a href=
+ * "http://docs.oracle.com/javase/tutorial/essential/io/pathOps.html"
+ * >Java Tutorials: Path Operations</a>
+ */
+public class GcsPath implements Path {
+
+  public static final String SCHEME = "gs";
+
+  /**
+   * Creates a GcsPath from a URI.
+   *
+   * <p>The URI must be in the form {@code gs://[bucket]/[path]}, and may not
+   * contain a port, user info, a query, or a fragment.
+   */
+  public static GcsPath fromUri(URI uri) {
+    checkArgument(uri.getScheme().equalsIgnoreCase(SCHEME), "URI: %s is not a GCS URI", uri);
+    checkArgument(uri.getPort() == -1,
+        "GCS URI may not specify port: %s (%i)", uri, uri.getPort());
+    checkArgument(
+        isNullOrEmpty(uri.getUserInfo()),
+        "GCS URI may not specify userInfo: %s (%s)", uri, uri.getUserInfo());
+    checkArgument(
+        isNullOrEmpty(uri.getQuery()),
+        "GCS URI may not specify query: %s (%s)", uri, uri.getQuery());
+    checkArgument(
+        isNullOrEmpty(uri.getFragment()),
+        "GCS URI may not specify fragment: %s (%s)", uri, uri.getFragment());
+
+    return fromUri(uri.toString());
+  }
+
+  /**
+   * Pattern that is used to parse a GCS URL.
+   *
+   * <p>This is used to separate the components.  Verification is handled
+   * separately.
+   */
+  public static final Pattern GCS_URI =
+      Pattern.compile("(?<SCHEME>[^:]+)://(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
+
+  /**
+   * Creates a GcsPath from a URI in string form.
+   *
+   * <p>This does not use URI parsing, which means it may accept patterns that
+   * the URI parser would not accept.
+   */
+  public static GcsPath fromUri(String uri) {
+    Matcher m = GCS_URI.matcher(uri);
+    checkArgument(m.matches(), "Invalid GCS URI: %s", uri);
+
+    checkArgument(m.group("SCHEME").equalsIgnoreCase(SCHEME),
+        "URI: %s is not a GCS URI", uri);
+    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
+  }
+
+  /**
+   * Pattern that is used to parse a GCS resource name.
+   */
+  private static final Pattern GCS_RESOURCE_NAME =
+      Pattern.compile("storage.googleapis.com/(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
+
+  /**
+   * Creates a GcsPath from a OnePlatform resource name in string form.
+   */
+  public static GcsPath fromResourceName(String name) {
+    Matcher m = GCS_RESOURCE_NAME.matcher(name);
+    checkArgument(m.matches(), "Invalid GCS resource name: %s", name);
+
+    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
+  }
+
+  /**
+   * Creates a GcsPath from a {@linkplain StorageObject}.
+   */
+  public static GcsPath fromObject(StorageObject object) {
+    return new GcsPath(null, object.getBucket(), object.getName());
+  }
+
+  /**
+   * Creates a GcsPath from bucket and object components.
+   *
+   * <p>A GcsPath without a bucket name is treated as a relative path, which
+   * is a path component with no linkage to the root element.  This is similar
+   * to a Unix path that does not begin with the root marker (a slash).
+   * GCS has different naming constraints and APIs for working with buckets and
+   * objects, so these two concepts are kept separate to avoid accidental
+   * attempts to treat objects as buckets, or vice versa, as much as possible.
+   *
+   * <p>A GcsPath without an object name is a bucket reference.
+   * A bucket is always a directory, which could be used to lookup or add
+   * files to a bucket, but could not be opened as a file.
+   *
+   * <p>A GcsPath containing neither bucket or object names is treated as
+   * the root of the GCS filesystem.  A listing on the root element would return
+   * the buckets available to the user.
+   *
+   * <p>If {@code null} is passed as either parameter, it is converted to an
+   * empty string internally for consistency.  There is no distinction between
+   * an empty string and a {@code null}, as neither are allowed by GCS.
+   *
+   * @param bucket a GCS bucket name, or none ({@code null} or an empty string)
+   *               if the object is not associated with a bucket
+   *               (e.g. relative paths or the root node).
+   * @param object a GCS object path, or none ({@code null} or an empty string)
+   *               for no object.
+   */
+  public static GcsPath fromComponents(@Nullable String bucket,
+                                       @Nullable String object) {
+    return new GcsPath(null, bucket, object);
+  }
+
+  @Nullable
+  private FileSystem fs;
+  @Nonnull
+  private final String bucket;
+  @Nonnull
+  private final String object;
+
+  /**
+   * Constructs a GcsPath.
+   *
+   * @param fs the associated FileSystem, if any
+   * @param bucket the associated bucket, or none ({@code null} or an empty
+   *               string) for a relative path component
+   * @param object the object, which is a fully-qualified object name if bucket
+   *               was also provided, or none ({@code null} or an empty string)
+   *               for no object
+   * @throws java.lang.IllegalArgumentException if the bucket of object names
+   *         are invalid.
+   */
+  public GcsPath(@Nullable FileSystem fs,
+                 @Nullable String bucket,
+                 @Nullable String object) {
+    if (bucket == null) {
+      bucket = "";
+    }
+    checkArgument(!bucket.contains("/"),
+        "GCS bucket may not contain a slash");
+    checkArgument(bucket.isEmpty()
+                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
+            "GCS bucket names must contain only lowercase letters, numbers, "
+                + "dashes (-), underscores (_), and dots (.). Bucket names "
+                + "must start and end with a number or letter. "
+                + "See https://developers.google.com/storage/docs/bucketnaming "
+                + "for more details.  Bucket name: " + bucket);
+
+    if (object == null) {
+      object = "";
+    }
+    checkArgument(
+        object.indexOf('\n') < 0 && object.indexOf('\r') < 0,
+        "GCS object names must not contain Carriage Return or "
+            + "Line Feed characters.");
+
+    this.fs = fs;
+    this.bucket = bucket;
+    this.object = object;
+  }
+
+  /**
+   * Returns the bucket name associated with this GCS path, or an empty string
+   * if this is a relative path component.
+   */
+  public String getBucket() {
+    return bucket;
+  }
+
+  /**
+   * Returns the object name associated with this GCS path, or an empty string
+   * if no object is specified.
+   */
+  public String getObject() {
+    return object;
+  }
+
+  public void setFileSystem(FileSystem fs) {
+    this.fs = fs;
+  }
+
+  @Override
+  public FileSystem getFileSystem() {
+    return fs;
+  }
+
+  // Absolute paths are those that have a bucket and the root path.
+  @Override
+  public boolean isAbsolute() {
+    return !bucket.isEmpty() || object.isEmpty();
+  }
+
+  @Override
+  public GcsPath getRoot() {
+    return new GcsPath(fs, "", "");
+  }
+
+  @Override
+  public GcsPath getFileName() {
+    int nameCount = getNameCount();
+    if (nameCount < 2) {
+      throw new UnsupportedOperationException(
+          "Can't get filename from root path in the bucket: " + this);
+    }
+    return getName(nameCount - 1);
+  }
+
+  /**
+   * Returns the <em>parent path</em>, or {@code null} if this path does not
+   * have a parent.
+   *
+   * <p>Returns a path that ends in '/', as the parent path always refers to
+   * a directory.
+   */
+  @Override
+  public GcsPath getParent() {
+    if (bucket.isEmpty() && object.isEmpty()) {
+      // The root path has no parent, by definition.
+      return null;
+    }
+
+    if (object.isEmpty()) {
+      // A GCS bucket. All buckets come from a common root.
+      return getRoot();
+    }
+
+    // Skip last character, in case it is a trailing slash.
+    int i = object.lastIndexOf('/', object.length() - 2);
+    if (i <= 0) {
+      if (bucket.isEmpty()) {
+        // Relative paths are not attached to the root node.
+        return null;
+      }
+      return new GcsPath(fs, bucket, "");
+    }
+
+    // Retain trailing slash.
+    return new GcsPath(fs, bucket, object.substring(0, i + 1));
+  }
+
+  @Override
+  public int getNameCount() {
+    int count = bucket.isEmpty() ? 0 : 1;
+    if (object.isEmpty()) {
+      return count;
+    }
+
+    // Add another for each separator found.
+    int index = -1;
+    while ((index = object.indexOf('/', index + 1)) != -1) {
+      count++;
+    }
+
+    return object.endsWith("/") ? count : count + 1;
+  }
+
+  @Override
+  public GcsPath getName(int count) {
+    checkArgument(count >= 0);
+
+    Iterator<Path> iterator = iterator();
+    for (int i = 0; i < count; ++i) {
+      checkArgument(iterator.hasNext());
+      iterator.next();
+    }
+
+    checkArgument(iterator.hasNext());
+    return (GcsPath) iterator.next();
+  }
+
+  @Override
+  public GcsPath subpath(int beginIndex, int endIndex) {
+    checkArgument(beginIndex >= 0);
+    checkArgument(endIndex > beginIndex);
+
+    Iterator<Path> iterator = iterator();
+    for (int i = 0; i < beginIndex; ++i) {
+      checkArgument(iterator.hasNext());
+      iterator.next();
+    }
+
+    GcsPath path = null;
+    while (beginIndex < endIndex) {
+      checkArgument(iterator.hasNext());
+      if (path == null) {
+        path = (GcsPath) iterator.next();
+      } else {
+        path = path.resolve(iterator.next());
+      }
+      ++beginIndex;
+    }
+
+    return path;
+  }
+
+  @Override
+  public boolean startsWith(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath gcsPath = (GcsPath) other;
+      return startsWith(gcsPath.bucketAndObject());
+    } else {
+      return startsWith(other.toString());
+    }
+  }
+
+  @Override
+  public boolean startsWith(String prefix) {
+    return bucketAndObject().startsWith(prefix);
+  }
+
+  @Override
+  public boolean endsWith(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath gcsPath = (GcsPath) other;
+      return endsWith(gcsPath.bucketAndObject());
+    } else {
+      return endsWith(other.toString());
+    }
+  }
+
+  @Override
+  public boolean endsWith(String suffix) {
+    return bucketAndObject().endsWith(suffix);
+  }
+
+  // TODO: support "." and ".." path components?
+  @Override
+  public GcsPath normalize() {
+    return this;
+  }
+
+  @Override
+  public GcsPath resolve(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath path = (GcsPath) other;
+      if (path.isAbsolute()) {
+        return path;
+      } else {
+        return resolve(path.getObject());
+      }
+    } else {
+      return resolve(other.toString());
+    }
+  }
+
+  @Override
+  public GcsPath resolve(String other) {
+    if (bucket.isEmpty() && object.isEmpty()) {
+      // Resolve on a root path is equivalent to looking up a bucket and object.
+      other = SCHEME + "://" + other;
+    }
+
+    if (other.startsWith(SCHEME + "://")) {
+      GcsPath path = GcsPath.fromUri(other);
+      path.setFileSystem(getFileSystem());
+      return path;
+    }
+
+    if (other.isEmpty()) {
+      // An empty component MUST refer to a directory.
+      other = "/";
+    }
+
+    if (object.isEmpty()) {
+      return new GcsPath(fs, bucket, other);
+    } else if (object.endsWith("/")) {
+      return new GcsPath(fs, bucket, object + other);
+    } else {
+      return new GcsPath(fs, bucket, object + "/" + other);
+    }
+  }
+
+  @Override
+  public Path resolveSibling(Path other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Path resolveSibling(String other) {
+    if (getNameCount() < 2) {
+      throw new UnsupportedOperationException("Can't resolve the sibling of a root path: " + this);
+    }
+    GcsPath parent = getParent();
+    return (parent == null) ? fromUri(other) : parent.resolve(other);
+  }
+
+  @Override
+  public Path relativize(Path other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public GcsPath toAbsolutePath() {
+    return this;
+  }
+
+  @Override
+  public GcsPath toRealPath(LinkOption... options) throws IOException {
+    return this;
+  }
+
+  @Override
+  public File toFile() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
+      WatchEvent.Modifier... modifiers) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
+      throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Iterator<Path> iterator() {
+    return new NameIterator(fs, !bucket.isEmpty(), bucketAndObject());
+  }
+
+  private static class NameIterator implements Iterator<Path> {
+    private final FileSystem fs;
+    private boolean fullPath;
+    private String name;
+
+    NameIterator(FileSystem fs, boolean fullPath, String name) {
+      this.fs = fs;
+      this.fullPath = fullPath;
+      this.name = name;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return !isNullOrEmpty(name);
+    }
+
+    @Override
+    public GcsPath next() {
+      int i = name.indexOf('/');
+      String component;
+      if (i >= 0) {
+        component = name.substring(0, i);
+        name = name.substring(i + 1);
+      } else {
+        component = name;
+        name = null;
+      }
+      if (fullPath) {
+        fullPath = false;
+        return new GcsPath(fs, component, "");
+      } else {
+        // Relative paths have no bucket.
+        return new GcsPath(fs, "", component);
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  @Override
+  public int compareTo(Path other) {
+    if (!(other instanceof GcsPath)) {
+      throw new ClassCastException();
+    }
+
+    GcsPath path = (GcsPath) other;
+    int b = bucket.compareTo(path.bucket);
+    if (b != 0) {
+      return b;
+    }
+
+    // Compare a component at a time, so that the separator char doesn't
+    // get compared against component contents.  Eg, "a/b" < "a-1/b".
+    Iterator<Path> left = iterator();
+    Iterator<Path> right = path.iterator();
+
+    while (left.hasNext() && right.hasNext()) {
+      String leftStr = left.next().toString();
+      String rightStr = right.next().toString();
+      int c = leftStr.compareTo(rightStr);
+      if (c != 0) {
+        return c;
+      }
+    }
+
+    if (!left.hasNext() && !right.hasNext()) {
+      return 0;
+    } else {
+      return left.hasNext() ? 1 : -1;
+    }
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    GcsPath paths = (GcsPath) o;
+    return bucket.equals(paths.bucket) && object.equals(paths.object);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = bucket.hashCode();
+    result = 31 * result + object.hashCode();
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    if (!isAbsolute()) {
+      return object;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(SCHEME)
+        .append("://");
+    if (!bucket.isEmpty()) {
+      sb.append(bucket)
+          .append('/');
+    }
+    sb.append(object);
+    return sb.toString();
+  }
+
+  // TODO: Consider using resource names for all GCS paths used by the SDK.
+  public String toResourceName() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("storage.googleapis.com/");
+    if (!bucket.isEmpty()) {
+      sb.append(bucket).append('/');
+    }
+    sb.append(object);
+    return sb.toString();
+  }
+
+  @Override
+  public URI toUri() {
+    try {
+      return new URI(SCHEME, "//" + bucketAndObject(), null);
+    } catch (URISyntaxException e) {
+      throw new RuntimeException("Unable to create URI for GCS path " + this);
+    }
+  }
+
+  private String bucketAndObject() {
+    if (bucket.isEmpty()) {
+      return object;
+    } else {
+      return bucket + "/" + object;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java b/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
new file mode 100644
index 0000000..4d49f8c
--- /dev/null
+++ b/sdks/java/extensions/gcp-core/src/main/java/org/apache/beam/sdk/util/gcsfs/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Defines utilities used to interact with Google Cloud Storage. */
+package org.apache.beam.sdk.util.gcsfs;

http://git-wip-us.apache.org/repos/asf/beam/blob/1a3f350d/sdks/java/extensions/gcp-core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/gcp-core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java b/sdks/java/extensions/gcp-core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
new file mode 100644
index 0000000..426fb16
--- /dev/null
+++ b/sdks/java/extensions/gcp-core/src/test/java/org/apache/beam/sdk/util/gcsfs/GcsPathTest.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.util.gcsfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests of GcsPath.
+ */
+@RunWith(JUnit4.class)
+public class GcsPathTest {
+
+  /**
+   * Test case, which tests parsing and building of GcsPaths.
+   */
+  static final class TestCase {
+
+    final String uri;
+    final String expectedBucket;
+    final String expectedObject;
+    final String[] namedComponents;
+
+    TestCase(String uri, String... namedComponents) {
+      this.uri = uri;
+      this.expectedBucket = namedComponents[0];
+      this.namedComponents = namedComponents;
+      this.expectedObject = uri.substring(expectedBucket.length() + 6);
+    }
+  }
+
+  // Each test case is an expected URL, then the components used to build it.
+  // Empty components result in a double slash.
+  static final List<TestCase> PATH_TEST_CASES = Arrays.asList(
+      new TestCase("gs://bucket/then/object", "bucket", "then", "object"),
+      new TestCase("gs://bucket//then/object", "bucket", "", "then", "object"),
+      new TestCase("gs://bucket/then//object", "bucket", "then", "", "object"),
+      new TestCase("gs://bucket/then///object", "bucket", "then", "", "", "object"),
+      new TestCase("gs://bucket/then/object/", "bucket", "then", "object/"),
+      new TestCase("gs://bucket/then/object/", "bucket", "then/", "object/"),
+      new TestCase("gs://bucket/then/object//", "bucket", "then", "object", ""),
+      new TestCase("gs://bucket/then/object//", "bucket", "then", "object/", ""),
+      new TestCase("gs://bucket/", "bucket")
+  );
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testGcsPathParsing() throws Exception {
+    for (TestCase testCase : PATH_TEST_CASES) {
+      String uriString = testCase.uri;
+
+      GcsPath path = GcsPath.fromUri(URI.create(uriString));
+      // Deconstruction - check bucket, object, and components.
+      assertEquals(testCase.expectedBucket, path.getBucket());
+      assertEquals(testCase.expectedObject, path.getObject());
+      assertEquals(testCase.uri,
+          testCase.namedComponents.length, path.getNameCount());
+
+      // Construction - check that the path can be built from components.
+      GcsPath built = GcsPath.fromComponents(null, null);
+      for (String component : testCase.namedComponents) {
+        built = built.resolve(component);
+      }
+      assertEquals(testCase.uri, built.toString());
+    }
+  }
+
+  @Test
+  public void testParentRelationship() throws Exception {
+    GcsPath path = GcsPath.fromComponents("bucket", "then/object");
+    assertEquals("bucket", path.getBucket());
+    assertEquals("then/object", path.getObject());
+    assertEquals(3, path.getNameCount());
+    assertTrue(path.endsWith("object"));
+    assertTrue(path.startsWith("bucket/then"));
+
+    GcsPath parent = path.getParent();  // gs://bucket/then/
+    assertEquals("bucket", parent.getBucket());
+    assertEquals("then/", parent.getObject());
+    assertEquals(2, parent.getNameCount());
+    assertThat(path, Matchers.not(Matchers.equalTo(parent)));
+    assertTrue(path.startsWith(parent));
+    assertFalse(parent.startsWith(path));
+    assertTrue(parent.endsWith("then/"));
+    assertTrue(parent.startsWith("bucket/then"));
+    assertTrue(parent.isAbsolute());
+
+    GcsPath root = path.getRoot();
+    assertEquals(0, root.getNameCount());
+    assertEquals("gs://", root.toString());
+    assertEquals("", root.getBucket());
+    assertEquals("", root.getObject());
+    assertTrue(root.isAbsolute());
+    assertThat(root, Matchers.equalTo(parent.getRoot()));
+
+    GcsPath grandParent = parent.getParent();  // gs://bucket/
+    assertEquals(1, grandParent.getNameCount());
+    assertEquals("gs://bucket/", grandParent.toString());
+    assertTrue(grandParent.isAbsolute());
+    assertThat(root, Matchers.equalTo(grandParent.getParent()));
+    assertThat(root.getParent(), Matchers.nullValue());
+
+    assertTrue(path.startsWith(path.getRoot()));
+    assertTrue(parent.startsWith(path.getRoot()));
+  }
+
+  @Test
+  public void testRelativeParent() throws Exception {
+    GcsPath path = GcsPath.fromComponents(null, "a/b");
+    GcsPath parent = path.getParent();
+    assertEquals("a/", parent.toString());
+
+    GcsPath grandParent = parent.getParent();
+    assertNull(grandParent);
+  }
+
+  @Test
+  public void testUriSupport() throws Exception {
+    URI uri = URI.create("gs://bucket/some/path");
+
+    GcsPath path = GcsPath.fromUri(uri);
+    assertEquals("bucket", path.getBucket());
+    assertEquals("some/path", path.getObject());
+
+    URI reconstructed = path.toUri();
+    assertEquals(uri, reconstructed);
+
+    path = GcsPath.fromUri("gs://bucket");
+    assertEquals("gs://bucket/", path.toString());
+  }
+
+  @Test
+  public void testBucketParsing() throws Exception {
+    GcsPath path = GcsPath.fromUri("gs://bucket");
+    GcsPath path2 = GcsPath.fromUri("gs://bucket/");
+
+    assertEquals(path, path2);
+    assertEquals(path.toString(), path2.toString());
+    assertEquals(path.toUri(), path2.toUri());
+  }
+
+  @Test
+  public void testGcsPathToString() throws Exception {
+    String filename = "gs://some_bucket/some/file.txt";
+    GcsPath path = GcsPath.fromUri(filename);
+    assertEquals(filename, path.toString());
+  }
+
+  @Test
+  public void testEquals() {
+    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
+    GcsPath a2 = GcsPath.fromComponents(null, "a/b/c");
+    assertFalse(a.isAbsolute());
+    assertFalse(a2.isAbsolute());
+
+    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath b2 = GcsPath.fromComponents("bucket", "a/b/c");
+    assertTrue(b.isAbsolute());
+    assertTrue(b2.isAbsolute());
+
+    assertEquals(a, a);
+    assertThat(a, Matchers.not(Matchers.equalTo(b)));
+    assertThat(b, Matchers.not(Matchers.equalTo(a)));
+
+    assertEquals(a, a2);
+    assertEquals(a2, a);
+    assertEquals(b, b2);
+    assertEquals(b2, b);
+
+    assertThat(a, Matchers.not(Matchers.equalTo(Paths.get("/tmp/foo"))));
+    assertTrue(a != null);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidGcsPath() {
+    @SuppressWarnings("unused")
+    GcsPath filename =
+        GcsPath.fromUri("file://invalid/gcs/path");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidBucket() {
+    GcsPath.fromComponents("invalid/", "");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidObject_newline() {
+    GcsPath.fromComponents(null, "a\nb");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidObject_cr() {
+    GcsPath.fromComponents(null, "a\rb");
+  }
+
+  @Test
+  public void testResolveUri() {
+    GcsPath path = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath d = path.resolve("gs://bucket2/d");
+    assertEquals("gs://bucket2/d", d.toString());
+  }
+
+  @Test
+  public void testResolveOther() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a");
+    GcsPath b = a.resolve(Paths.get("b"));
+    assertEquals("a/b", b.getObject());
+  }
+
+  @Test
+  public void testGetFileName() {
+    assertEquals("foo", GcsPath.fromUri("gs://bucket/bar/foo").getFileName().toString());
+    assertEquals("foo", GcsPath.fromUri("gs://bucket/foo").getFileName().toString());
+    thrown.expect(UnsupportedOperationException.class);
+    GcsPath.fromUri("gs://bucket/").getFileName();
+  }
+
+  @Test
+  public void testResolveSibling() {
+    assertEquals(
+        "gs://bucket/bar/moo",
+        GcsPath.fromUri("gs://bucket/bar/foo").resolveSibling("moo").toString());
+    assertEquals(
+        "gs://bucket/moo",
+        GcsPath.fromUri("gs://bucket/foo").resolveSibling("moo").toString());
+    thrown.expect(UnsupportedOperationException.class);
+    GcsPath.fromUri("gs://bucket/").resolveSibling("moo");
+  }
+
+  @Test
+  public void testCompareTo() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a");
+    GcsPath b = GcsPath.fromComponents("bucket", "b");
+    GcsPath b2 = GcsPath.fromComponents("bucket2", "b");
+    GcsPath brel = GcsPath.fromComponents(null, "b");
+    GcsPath a2 = GcsPath.fromComponents("bucket", "a");
+    GcsPath arel = GcsPath.fromComponents(null, "a");
+
+    assertThat(a.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(a), Matchers.greaterThan(0));
+    assertThat(a.compareTo(a2), Matchers.equalTo(0));
+
+    assertThat(a.hashCode(), Matchers.equalTo(a2.hashCode()));
+    assertThat(a.hashCode(), Matchers.not(Matchers.equalTo(b.hashCode())));
+    assertThat(b.hashCode(), Matchers.not(Matchers.equalTo(brel.hashCode())));
+
+    assertThat(brel.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(brel), Matchers.greaterThan(0));
+    assertThat(arel.compareTo(brel), Matchers.lessThan(0));
+    assertThat(brel.compareTo(arel), Matchers.greaterThan(0));
+
+    assertThat(b.compareTo(b2), Matchers.lessThan(0));
+    assertThat(b2.compareTo(b), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testCompareTo_ordering() {
+    GcsPath ab = GcsPath.fromComponents("bucket", "a/b");
+    GcsPath abc = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath a1b = GcsPath.fromComponents("bucket", "a-1/b");
+
+    assertThat(ab.compareTo(a1b), Matchers.lessThan(0));
+    assertThat(a1b.compareTo(ab), Matchers.greaterThan(0));
+
+    assertThat(ab.compareTo(abc), Matchers.lessThan(0));
+    assertThat(abc.compareTo(ab), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testCompareTo_buckets() {
+    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
+    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
+
+    assertThat(a.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(a), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testIterator() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c");
+    Iterator<Path> it = a.iterator();
+
+    assertTrue(it.hasNext());
+    assertEquals("gs://bucket/", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("a", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("b", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("c", it.next().toString());
+    assertFalse(it.hasNext());
+  }
+
+  @Test
+  public void testSubpath() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    assertThat(a.subpath(0, 1).toString(), Matchers.equalTo("gs://bucket/"));
+    assertThat(a.subpath(0, 2).toString(), Matchers.equalTo("gs://bucket/a"));
+    assertThat(a.subpath(0, 3).toString(), Matchers.equalTo("gs://bucket/a/b"));
+    assertThat(a.subpath(0, 4).toString(), Matchers.equalTo("gs://bucket/a/b/c"));
+    assertThat(a.subpath(1, 2).toString(), Matchers.equalTo("a"));
+    assertThat(a.subpath(2, 3).toString(), Matchers.equalTo("b"));
+    assertThat(a.subpath(2, 4).toString(), Matchers.equalTo("b/c"));
+    assertThat(a.subpath(2, 5).toString(), Matchers.equalTo("b/c/d"));
+  }
+
+  @Test
+  public void testGetName() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    assertEquals(5, a.getNameCount());
+    assertThat(a.getName(0).toString(), Matchers.equalTo("gs://bucket/"));
+    assertThat(a.getName(1).toString(), Matchers.equalTo("a"));
+    assertThat(a.getName(2).toString(), Matchers.equalTo("b"));
+    assertThat(a.getName(3).toString(), Matchers.equalTo("c"));
+    assertThat(a.getName(4).toString(), Matchers.equalTo("d"));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSubPathError() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    a.subpath(1, 1); // throws IllegalArgumentException
+    Assert.fail();
+  }
+}


[2/2] beam git commit: [BEAM-1871] Move over GcsPath to gcp-core

Posted by lc...@apache.org.
[BEAM-1871] Move over GcsPath to gcp-core

This closes #2616


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/33078d20
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/33078d20
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/33078d20

Branch: refs/heads/master
Commit: 33078d20c45780fd276a03cfeb809292ff29aed9
Parents: 36a12d0 1a3f350
Author: Luke Cwik <lc...@google.com>
Authored: Thu Apr 20 14:23:08 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Thu Apr 20 14:23:08 2017 -0700

----------------------------------------------------------------------
 sdks/java/core/pom.xml                          |   5 -
 .../org/apache/beam/sdk/util/gcsfs/GcsPath.java | 626 -------------------
 .../beam/sdk/util/gcsfs/package-info.java       |  20 -
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |   1 -
 .../apache/beam/sdk/util/gcsfs/GcsPathTest.java | 358 -----------
 sdks/java/extensions/gcp-core/pom.xml           |   5 -
 .../org/apache/beam/sdk/util/gcsfs/GcsPath.java | 626 +++++++++++++++++++
 .../beam/sdk/util/gcsfs/package-info.java       |  20 +
 .../apache/beam/sdk/util/gcsfs/GcsPathTest.java | 358 +++++++++++
 9 files changed, 1004 insertions(+), 1015 deletions(-)
----------------------------------------------------------------------