You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by jb...@apache.org on 2017/03/20 14:16:02 UTC
[1/2] beam git commit: [BEAM-1569] Add support file patterns
containing spaces in HdfsIO
Repository: beam
Updated Branches:
refs/heads/master 53d7682d0 -> 6b8bfc75a
[BEAM-1569] Add support file patterns containing spaces in HdfsIO
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/63c4fdd9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/63c4fdd9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/63c4fdd9
Branch: refs/heads/master
Commit: 63c4fdd9d7f438f1e395921b6cc5f549825c04b4
Parents: 53d7682
Author: Michael Luckey <mi...@ext.gfk.com>
Authored: Wed Mar 1 01:13:11 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Mon Mar 20 14:58:29 2017 +0100
----------------------------------------------------------------------
.../apache/beam/sdk/io/hdfs/HDFSFileSource.java | 6 +++---
.../beam/sdk/io/hdfs/HDFSFileSourceTest.java | 19 +++++++++++++++++--
2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/63c4fdd9/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
index 0e3146f..357a527 100644
--- a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
+++ b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSource.java
@@ -30,7 +30,6 @@ import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
-import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.ListIterator;
@@ -338,9 +337,10 @@ public abstract class HDFSFileSource<T, K, V> extends BoundedSource<T> {
UGIHelper.getBestUGI(username()).doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
- FileSystem fs = FileSystem.get(new URI(filepattern()),
+ final Path pathPattern = new Path(filepattern());
+ FileSystem fs = FileSystem.get(pathPattern.toUri(),
SerializableConfiguration.newConfiguration(serializableConfiguration()));
- FileStatus[] fileStatuses = fs.globStatus(new Path(filepattern()));
+ FileStatus[] fileStatuses = fs.globStatus(pathPattern);
checkState(
fileStatuses != null && fileStatuses.length > 0,
"Unable to find any files matching %s", filepattern());
http://git-wip-us.apache.org/repos/asf/beam/blob/63c4fdd9/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java b/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
index ac6af40..c821d9d 100644
--- a/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
+++ b/sdks/java/io/hdfs/src/test/java/org/apache/beam/sdk/io/hdfs/HDFSFileSourceTest.java
@@ -63,8 +63,23 @@ public class HDFSFileSourceTest {
File file = createFileWithData("tmp.seq", expectedResults);
HDFSFileSource<KV<IntWritable, Text>, IntWritable, Text> source =
- HDFSFileSource.from(
- file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
+ HDFSFileSource.from(
+ file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
+
+ assertEquals(file.length(), source.getEstimatedSizeBytes(null));
+
+ assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
+ }
+
+ @Test
+ public void testFullyReadSingleFileWithSpaces() throws Exception {
+ PipelineOptions options = PipelineOptionsFactory.create();
+ List<KV<IntWritable, Text>> expectedResults = createRandomRecords(3, 10, 0);
+ File file = createFileWithData("tmp data.seq", expectedResults);
+
+ HDFSFileSource<KV<IntWritable, Text>, IntWritable, Text> source =
+ HDFSFileSource.from(
+ file.toString(), SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(file.length(), source.getEstimatedSizeBytes(null));
[2/2] beam git commit: [BEAM-1569] This closes #2132
Posted by jb...@apache.org.
[BEAM-1569] This closes #2132
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6b8bfc75
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6b8bfc75
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6b8bfc75
Branch: refs/heads/master
Commit: 6b8bfc75af95e486a16eebfbe817a25b0b27bd34
Parents: 53d7682 63c4fdd
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Mon Mar 20 15:15:56 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Mon Mar 20 15:15:56 2017 +0100
----------------------------------------------------------------------
.../apache/beam/sdk/io/hdfs/HDFSFileSource.java | 6 +++---
.../beam/sdk/io/hdfs/HDFSFileSourceTest.java | 19 +++++++++++++++++--
2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------