You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by jb...@apache.org on 2017/03/20 14:16:02 UTC

[1/2] beam git commit: [BEAM-1569] Add support file patterns containing spaces in HdfsIO

Repository: beam
Updated Branches:
  refs/heads/master 53d7682d0 -> 6b8bfc75a


[BEAM-1569] Add support file patterns containing spaces in HdfsIO


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/63c4fdd9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/63c4fdd9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/63c4fdd9

Branch: refs/heads/master
Commit: 63c4fdd9d7f438f1e395921b6cc5f549825c04b4
Parents: 53d7682
Author: Michael Luckey <mi...@ext.gfk.com>
Authored: Wed Mar 1 01:13:11 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Mon Mar 20 14:58:29 2017 +0100

----------------------------------------------------------------------
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java  |  6 +++---
 .../beam/sdk/io/hdfs/HDFSFileSourceTest.java     | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/63c4fdd9/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
index 0e3146f..357a527 100644
--- a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
+++ b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
@@ -30,7 +30,6 @@ import java.io.ObjectInput;
 import java.io.ObjectOutput;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.net.URI;
 import java.security.PrivilegedExceptionAction;
 import java.util.List;
 import java.util.ListIterator;
@@ -338,9 +337,10 @@ public abstract class HDFSFileSource<T, K, V> extends BoundedSource<T> {
         UGIHelper.getBestUGI(username()).doAs(new PrivilegedExceptionAction<Void>() {
               @Override
               public Void run() throws Exception {
-                FileSystem fs = FileSystem.get(new URI(filepattern()),
+                final Path pathPattern = new Path(filepattern());
+                FileSystem fs = FileSystem.get(pathPattern.toUri(),
                     SerializableConfiguration.newConfiguration(serializableConfiguration()));
-                FileStatus[] fileStatuses = fs.globStatus(new Path(filepattern()));
+                FileStatus[] fileStatuses = fs.globStatus(pathPattern);
                 checkState(
                     fileStatuses != null && fileStatuses.length > 0,
                     "Unable to find any files matching %s", filepattern());

http://git-wip-us.apache.org/repos/asf/beam/blob/63c4fdd9/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java b/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
index ac6af40..c821d9d 100644
--- a/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
+++ b/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
@@ -63,8 +63,23 @@ public class HDFSFileSourceTest {
     File file = createFileWithData("tmp.seq", expectedResults);
 
     HDFSFileSource<KV<IntWritable, Text>, IntWritable, Text> source =
-        HDFSFileSource.from(
-            file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
+            HDFSFileSource.from(
+                    file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
+
+    assertEquals(file.length(), source.getEstimatedSizeBytes(null));
+
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
+  }
+
+  @Test
+  public void testFullyReadSingleFileWithSpaces() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<KV<IntWritable, Text>> expectedResults = createRandomRecords(3, 10, 0);
+    File file = createFileWithData("tmp data.seq", expectedResults);
+
+    HDFSFileSource<KV<IntWritable, Text>, IntWritable, Text> source =
+            HDFSFileSource.from(
+                    file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
 
     assertEquals(file.length(), source.getEstimatedSizeBytes(null));
 


[2/2] beam git commit: [BEAM-1569] This closes #2132

Posted by jb...@apache.org.
[BEAM-1569] This closes #2132


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6b8bfc75
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6b8bfc75
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6b8bfc75

Branch: refs/heads/master
Commit: 6b8bfc75af95e486a16eebfbe817a25b0b27bd34
Parents: 53d7682 63c4fdd
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Mon Mar 20 15:15:56 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Mon Mar 20 15:15:56 2017 +0100

----------------------------------------------------------------------
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java  |  6 +++---
 .../beam/sdk/io/hdfs/HDFSFileSourceTest.java     | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------