You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by xi...@apache.org on 2018/03/13 19:28:19 UTC
samza git commit: SAMZA-1618: fix HdfsFileSystemAdapter to get files
recursively
Repository: samza
Updated Branches:
refs/heads/master 2be7061d4 -> 49e5073c3
SAMZA-1618: fix HdfsFileSystemAdapter to get files recursively
fix HdfsFileSystemAdapter to get files recursively
Author: Hai Lu <ha...@linkedin.com>
Reviewers: Xinyu Liu <xi...@gmail.com>
Closes #447 from lhaiesp/master
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/49e5073c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/49e5073c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/49e5073c
Branch: refs/heads/master
Commit: 49e5073c318abcdf64537d403b80dec218274244
Parents: 2be7061
Author: Hai Lu <ha...@linkedin.com>
Authored: Tue Mar 13 12:28:11 2018 -0700
Committer: xiliu <xi...@linkedin.com>
Committed: Tue Mar 13 12:28:11 2018 -0700
----------------------------------------------------------------------
.../hdfs/partitioner/HdfsFileSystemAdapter.java | 7 +++++--
.../hdfs/partitioner/TestHdfsFileSystemAdapter.java | 2 +-
.../resources/partitioner/subfolder/testfile002 | 16 ++++++++++++++++
3 files changed, 22 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java b/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
index bb7b3fa..07caaf7 100644
--- a/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
+++ b/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.samza.SamzaException;
-import org.apache.samza.config.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -44,7 +43,11 @@ public class HdfsFileSystemAdapter implements FileSystemAdapter {
FileSystem fileSystem = streamPath.getFileSystem(new Configuration());
FileStatus[] fileStatuses = fileSystem.listStatus(streamPath);
for (FileStatus fileStatus : fileStatuses) {
- ret.add(new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen()));
+ if (!fileStatus.isDirectory()) {
+ ret.add(new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen()));
+ } else {
+ ret.addAll(getAllFiles(fileStatus.getPath().toString()));
+ }
}
} catch (IOException e) {
LOG.error("Failed to get the list of files for " + streamName, e);
http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java b/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
index 0fb461f..a20e285 100644
--- a/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
+++ b/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
@@ -38,7 +38,7 @@ public class TestHdfsFileSystemAdapter {
FileSystemAdapter adapter = new HdfsFileSystemAdapter();
List<FileSystemAdapter.FileMetadata> result =
adapter.getAllFiles(url.getPath());
- Assert.assertEquals(2, result.size());
+ Assert.assertEquals(3, result.size());
}
@Test
http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002 b/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
new file mode 100644
index 0000000..fe3e3b6
--- /dev/null
+++ b/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
@@ -0,0 +1,16 @@
+censed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.