You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by xi...@apache.org on 2018/03/13 19:28:19 UTC

samza git commit: SAMZA-1618: fix HdfsFileSystemAdapter to get files recursively

Repository: samza
Updated Branches:
  refs/heads/master 2be7061d4 -> 49e5073c3


SAMZA-1618: fix HdfsFileSystemAdapter to get files recursively

fix HdfsFileSystemAdapter to get files recursively

Author: Hai Lu <ha...@linkedin.com>

Reviewers: Xinyu Liu <xi...@gmail.com>

Closes #447 from lhaiesp/master


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/49e5073c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/49e5073c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/49e5073c

Branch: refs/heads/master
Commit: 49e5073c318abcdf64537d403b80dec218274244
Parents: 2be7061
Author: Hai Lu <ha...@linkedin.com>
Authored: Tue Mar 13 12:28:11 2018 -0700
Committer: xiliu <xi...@linkedin.com>
Committed: Tue Mar 13 12:28:11 2018 -0700

----------------------------------------------------------------------
 .../hdfs/partitioner/HdfsFileSystemAdapter.java     |  7 +++++--
 .../hdfs/partitioner/TestHdfsFileSystemAdapter.java |  2 +-
 .../resources/partitioner/subfolder/testfile002     | 16 ++++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java b/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
index bb7b3fa..07caaf7 100644
--- a/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
+++ b/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/partitioner/HdfsFileSystemAdapter.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.samza.SamzaException;
-import org.apache.samza.config.Config;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,7 +43,11 @@ public class HdfsFileSystemAdapter implements FileSystemAdapter {
       FileSystem fileSystem = streamPath.getFileSystem(new Configuration());
       FileStatus[] fileStatuses = fileSystem.listStatus(streamPath);
       for (FileStatus fileStatus : fileStatuses) {
-        ret.add(new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen()));
+        if (!fileStatus.isDirectory()) {
+          ret.add(new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen()));
+        } else {
+          ret.addAll(getAllFiles(fileStatus.getPath().toString()));
+        }
       }
     } catch (IOException e) {
       LOG.error("Failed to get the list of files for " + streamName, e);

http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java b/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
index 0fb461f..a20e285 100644
--- a/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
+++ b/samza-hdfs/src/test/java/org/apache/samza/system/hdfs/partitioner/TestHdfsFileSystemAdapter.java
@@ -38,7 +38,7 @@ public class TestHdfsFileSystemAdapter {
     FileSystemAdapter adapter = new HdfsFileSystemAdapter();
     List<FileSystemAdapter.FileMetadata> result =
       adapter.getAllFiles(url.getPath());
-    Assert.assertEquals(2, result.size());
+    Assert.assertEquals(3, result.size());
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/samza/blob/49e5073c/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
----------------------------------------------------------------------
diff --git a/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002 b/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
new file mode 100644
index 0000000..fe3e3b6
--- /dev/null
+++ b/samza-hdfs/src/test/resources/partitioner/subfolder/testfile002
@@ -0,0 +1,16 @@
+censed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.