You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ga...@apache.org on 2022/03/18 16:11:31 UTC

[hadoop] branch trunk updated: HDFS-16470. Make HDFS find tool cross platform (#4076)

This is an automated email from the ASF dual-hosted git repository.

gaurava pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new a631f45  HDFS-16470. Make HDFS find tool cross platform (#4076)
a631f45 is described below

commit a631f45a99c7abf8c9a2dcfb10afb668c8ff6b09
Author: Gautham B A <ga...@gmail.com>
AuthorDate: Fri Mar 18 21:41:01 2022 +0530

    HDFS-16470. Make HDFS find tool cross platform (#4076)
    
    * The source files for hdfs_find uses
       getopt for parsing the command
       line arguments. getopt is available
       only on Linux and thus, isn't cross
       platform.
    * Thus, we need to replace getopt
       with boost::program_options to
       make hdfs_find cross platform.
---
 .../native/libhdfspp/tests/tools/CMakeLists.txt    |   3 +
 .../tests/tools/hdfs-create-snapshot-mock.cc       |   8 +-
 .../native/libhdfspp/tests/tools/hdfs-find-mock.cc |  93 ++++++++++
 .../native/libhdfspp/tests/tools/hdfs-find-mock.h  |  69 ++++++++
 .../libhdfspp/tests/tools/hdfs-tool-tests.cc       |  20 +++
 .../native/libhdfspp/tests/tools/hdfs-tool-tests.h |  32 +++-
 .../src/main/native/libhdfspp/tools/CMakeLists.txt |   3 +-
 .../libhdfspp/tools/hdfs-find/CMakeLists.txt       |  27 +++
 .../native/libhdfspp/tools/hdfs-find/hdfs-find.cc  | 193 +++++++++++++++++++++
 .../native/libhdfspp/tools/hdfs-find/hdfs-find.h   |  96 ++++++++++
 .../main/native/libhdfspp/tools/hdfs-find/main.cc  |  52 ++++++
 .../src/main/native/libhdfspp/tools/hdfs_find.cc   | 146 ----------------
 12 files changed, 590 insertions(+), 152 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
index 769e5da..22d677f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
@@ -36,6 +36,7 @@ add_executable(hdfs_tool_tests
                 hdfs-mkdir-mock.cc
                 hdfs-rm-mock.cc
                 hdfs-get-mock.cc
+                hdfs-find-mock.cc
                 main.cc)
 target_include_directories(hdfs_tool_tests PRIVATE
                             ../tools
@@ -56,6 +57,7 @@ target_include_directories(hdfs_tool_tests PRIVATE
                             ../../tools/hdfs-mkdir
                             ../../tools/hdfs-rm
                             ../../tools/hdfs-get
+                            ../../tools/hdfs-find
                             ../../tools/hdfs-cat)
 target_link_libraries(hdfs_tool_tests PRIVATE
                        gmock_main
@@ -75,5 +77,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE
                        hdfs_mkdir_lib
                        hdfs_rm_lib
                        hdfs_get_lib
+                       hdfs_find_lib
                        hdfs_cat_lib)
 add_test(hdfs_tool_tests hdfs_tool_tests)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc
index 3239631..6a4f96a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc
@@ -48,9 +48,11 @@ void CreateSnapshotMock::SetExpectations(
   }
 
   if (*test_case_func == &PassNOptAndAPath<CreateSnapshotMock>) {
-    const auto arg1 = args[1];
-    const auto arg2 = std::optional{args[0]};
-    EXPECT_CALL(*this, HandleSnapshot(arg1, arg2))
+    const auto opt_n = args[0];
+    const auto path = args[2];
+    const auto opt_n_value = std::optional{args[1]};
+    ASSERT_EQ(opt_n, "-n");
+    EXPECT_CALL(*this, HandleSnapshot(path, opt_n_value))
         .Times(1)
         .WillOnce(testing::Return(true));
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc
new file mode 100644
index 0000000..9fd57ec
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "hdfs-find-mock.h"
+#include "hdfs-tool-tests.h"
+#include "hdfspp/hdfspp.h"
+
+namespace hdfs::tools::test {
+FindMock::~FindMock() = default;
+
+void FindMock::SetExpectations(
+    std::function<std::unique_ptr<FindMock>()> test_case,
+    const std::vector<std::string> &args) const {
+  // Get the pointer to the function that defines the test case
+  const auto test_case_func =
+      test_case.target<std::unique_ptr<FindMock> (*)()>();
+  ASSERT_NE(test_case_func, nullptr);
+
+  // Set the expected method calls and their corresponding arguments for each
+  // test case
+  if (*test_case_func == &CallHelp<FindMock>) {
+    EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true));
+    return;
+  }
+
+  if (*test_case_func == &PassAPath<FindMock>) {
+    const auto arg1 = args[0];
+    EXPECT_CALL(*this, HandlePath(arg1, "*",
+                                  hdfs::FileSystem::GetDefaultFindMaxDepth()))
+        .Times(1)
+        .WillOnce(testing::Return(true));
+  }
+
+  if (*test_case_func == &PassNOptAndAPath<FindMock>) {
+    const auto arg1 = args[0];
+    const auto arg2 = args[1];
+    const auto arg3 = args[2];
+    ASSERT_EQ(arg1, "-n");
+    EXPECT_CALL(*this, HandlePath(arg3, arg2,
+                                  hdfs::FileSystem::GetDefaultFindMaxDepth()))
+        .Times(1)
+        .WillOnce(testing::Return(true));
+  }
+
+  if (*test_case_func == &PassMOptPermissionsAndAPath<FindMock>) {
+    const auto arg1 = args[0];
+    const auto arg2 = args[1];
+    const auto arg3 = args[2];
+    ASSERT_EQ(arg1, "-m");
+    EXPECT_CALL(*this,
+                HandlePath(arg3, "*", static_cast<uint32_t>(std::stoi(arg2))))
+        .Times(1)
+        .WillOnce(testing::Return(true));
+  }
+
+  if (*test_case_func == &PassNStrMNumAndAPath<FindMock>) {
+    const auto arg1 = args[0];
+    const auto arg2 = args[1];
+    const auto arg3 = args[2];
+    const auto arg4 = args[3];
+    const auto arg5 = args[4];
+    ASSERT_EQ(arg1, "-n");
+    ASSERT_EQ(arg3, "-m");
+    EXPECT_CALL(*this,
+                HandlePath(arg5, arg2, static_cast<uint32_t>(std::stoi(arg4))))
+        .Times(1)
+        .WillOnce(testing::Return(true));
+  }
+}
+} // namespace hdfs::tools::test
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h
new file mode 100644
index 0000000..7520ac7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
+#define LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+
+#include "hdfs-find.h"
+
+namespace hdfs::tools::test {
+/**
+ * {@class FindMock} is an {@class Find} whereby it mocks the
+ * HandleHelp and HandlePath methods for testing their functionality.
+ */
+class FindMock : public hdfs::tools::Find {
+public:
+  /**
+   * {@inheritdoc}
+   */
+  FindMock(const int argc, char **argv) : Find(argc, argv) {}
+
+  // Abiding to the Rule of 5
+  FindMock(const FindMock &) = delete;
+  FindMock(FindMock &&) = delete;
+  FindMock &operator=(const FindMock &) = delete;
+  FindMock &operator=(FindMock &&) = delete;
+  ~FindMock() override;
+
+  /**
+   * Defines the methods and the corresponding arguments that are expected
+   * to be called on this instance of {@link HdfsTool} for the given test case.
+   *
+   * @param test_case An {@link std::function} object that points to the
+   * function defining the test case
+   * @param args The arguments that are passed to this test case
+   */
+  void SetExpectations(std::function<std::unique_ptr<FindMock>()> test_case,
+                       const std::vector<std::string> &args = {}) const;
+
+  MOCK_METHOD(bool, HandleHelp, (), (const, override));
+
+  MOCK_METHOD(bool, HandlePath,
+              (const std::string &, const std::string &, uint32_t),
+              (const, override));
+};
+} // namespace hdfs::tools::test
+
+#endif
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
index 50d555a..53df820 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
@@ -31,6 +31,7 @@
 #include "hdfs-df-mock.h"
 #include "hdfs-disallow-snapshot-mock.h"
 #include "hdfs-du-mock.h"
+#include "hdfs-find-mock.h"
 #include "hdfs-get-mock.h"
 #include "hdfs-mkdir-mock.h"
 #include "hdfs-move-to-local-mock.h"
@@ -140,6 +141,14 @@ INSTANTIATE_TEST_SUITE_P(
                     PassAPath<hdfs::tools::test::RmMock>,
                     PassRecursivePath<hdfs::tools::test::RmMock>));
 
+INSTANTIATE_TEST_SUITE_P(
+    HdfsFind, HdfsToolBasicTest,
+    testing::Values(CallHelp<hdfs::tools::test::FindMock>,
+                    PassAPath<hdfs::tools::test::FindMock>,
+                    PassNStrMNumAndAPath<hdfs::tools::test::FindMock>,
+                    PassMOptPermissionsAndAPath<hdfs::tools::test::FindMock>,
+                    PassNOptAndAPath<hdfs::tools::test::FindMock>));
+
 // Negative tests
 INSTANTIATE_TEST_SUITE_P(
     HdfsAllowSnapshot, HdfsToolNegativeTestThrows,
@@ -211,6 +220,17 @@ INSTANTIATE_TEST_SUITE_P(
                     PassMOpt<hdfs::tools::test::RmMock>));
 
 INSTANTIATE_TEST_SUITE_P(
+    HdfsFind, HdfsToolNegativeTestThrows,
+    testing::Values(Pass2Paths<hdfs::tools::test::FindMock>,
+                    Pass3Paths<hdfs::tools::test::FindMock>,
+                    PassRecursiveOwnerAndAPath<hdfs::tools::test::FindMock>,
+                    PassRecursive<hdfs::tools::test::FindMock>,
+                    PassRecursivePath<hdfs::tools::test::FindMock>,
+                    PassMPOptsPermissionsAndAPath<hdfs::tools::test::FindMock>,
+                    PassMOpt<hdfs::tools::test::FindMock>,
+                    PassNOpt<hdfs::tools::test::FindMock>));
+
+INSTANTIATE_TEST_SUITE_P(
     HdfsRm, HdfsToolNegativeTestNoThrow,
     testing::Values(PassRecursive<hdfs::tools::test::RmMock>));
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h
index 12dbc6c..f27a2b0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h
@@ -118,7 +118,7 @@ template <class T> std::unique_ptr<T> PassNOptAndAPath() {
   static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()};
 
   auto hdfs_tool = std::make_unique<T>(argc, argv);
-  hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg2, arg3});
+  hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg1, arg2, arg3});
   return hdfs_tool;
 }
 
@@ -271,4 +271,34 @@ template <class T> std::unique_ptr<T> PassMPOptsPermissionsAndAPath() {
   return hdfs_tool;
 }
 
+template <class T> std::unique_ptr<T> PassNStrMNumAndAPath() {
+  constexpr auto argc = 6;
+  static std::string exe("hdfs_tool_name");
+  static std::string arg1("-n");
+  static std::string arg2("some_str");
+  static std::string arg3("-m");
+  static std::string arg4("757");
+  static std::string arg5("some/path");
+
+  static char *argv[] = {exe.data(),  arg1.data(), arg2.data(),
+                         arg3.data(), arg4.data(), arg5.data()};
+
+  auto hdfs_tool = std::make_unique<T>(argc, argv);
+  hdfs_tool->SetExpectations(PassNStrMNumAndAPath<T>,
+                             {arg1, arg2, arg3, arg4, arg5});
+  return hdfs_tool;
+}
+
+template <class T> std::unique_ptr<T> PassNOpt() {
+  constexpr auto argc = 2;
+  static std::string exe("hdfs_tool_name");
+  static std::string arg1("-n");
+
+  static char *argv[] = {exe.data(), arg1.data()};
+
+  auto hdfs_tool = std::make_unique<T>(argc, argv);
+  hdfs_tool->SetExpectations(PassNOpt<T>, {arg1});
+  return hdfs_tool;
+}
+
 #endif
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
index bed78b7..214d7b5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
@@ -41,8 +41,7 @@ add_subdirectory(hdfs-chown)
 
 add_subdirectory(hdfs-chmod)
 
-add_executable(hdfs_find hdfs_find.cc)
-target_link_libraries(hdfs_find tools_common hdfspp_static)
+add_subdirectory(hdfs-find)
 
 add_subdirectory(hdfs-mkdir)
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt
new file mode 100644
index 0000000..c6ce021
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(hdfs_find_lib STATIC $<TARGET_OBJECTS:hdfs_tool_obj> hdfs-find.cc)
+target_include_directories(hdfs_find_lib PRIVATE ../../tools ${Boost_INCLUDE_DIRS})
+target_link_libraries(hdfs_find_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static)
+
+add_executable(hdfs_find main.cc)
+target_include_directories(hdfs_find PRIVATE ../../tools)
+target_link_libraries(hdfs_find PRIVATE hdfs_find_lib)
+
+install(TARGETS hdfs_find RUNTIME DESTINATION bin)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc
new file mode 100644
index 0000000..a149d05
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <future>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+#include "hdfs-find.h"
+#include "tools_common.h"
+
+namespace hdfs::tools {
+Find::Find(const int argc, char **argv) : HdfsTool(argc, argv) {}
+
+bool Find::Initialize() {
+  auto add_options = opt_desc_.add_options();
+  add_options(
+      "help,h",
+      "Finds all files recursively starting from the specified PATH and prints "
+      "their file paths. This hdfs_find tool mimics the POSIX find.");
+  add_options(
+      "name,n", po::value<std::string>(),
+      "If provided, all results will be matching the NAME pattern otherwise, "
+      "the implicit '*' will be used NAME allows wild-cards");
+  add_options(
+      "max-depth,m", po::value<u_int32_t>(),
+      "If provided, the maximum depth to recurse after the end of the path is "
+      "reached will be limited by MAX_DEPTH otherwise, the maximum depth to "
+      "recurse is unbound MAX_DEPTH can be set to 0 for pure globbing and "
+      "ignoring the NAME option (no recursion after the end of the path)");
+  add_options("path", po::value<std::string>(),
+              "The path where we want to start the find operation");
+
+  // We allow only one positional argument to be passed to this tool. An
+  // exception is thrown if multiple arguments are passed.
+  pos_opt_desc_.add("path", 1);
+
+  po::store(po::command_line_parser(argc_, argv_)
+                .options(opt_desc_)
+                .positional(pos_opt_desc_)
+                .run(),
+            opt_val_);
+  po::notify(opt_val_);
+  return true;
+}
+
+std::string Find::GetDescription() const {
+  std::stringstream desc;
+  desc << "Usage: hdfs_find [OPTION] PATH" << std::endl
+       << std::endl
+       << "Finds all files recursively starting from the" << std::endl
+       << "specified PATH and prints their file paths." << std::endl
+       << "This hdfs_find tool mimics the POSIX find." << std::endl
+       << std::endl
+       << "Both PATH and NAME can have wild-cards." << std::endl
+       << std::endl
+       << "  -n NAME       if provided all results will be matching the NAME "
+          "pattern"
+       << std::endl
+       << "                otherwise, the implicit '*' will be used"
+       << std::endl
+       << "                NAME allows wild-cards" << std::endl
+       << std::endl
+       << "  -m MAX_DEPTH  if provided the maximum depth to recurse after the "
+          "end of"
+       << std::endl
+       << "                the path is reached will be limited by MAX_DEPTH"
+       << std::endl
+       << "                otherwise, the maximum depth to recurse is unbound"
+       << std::endl
+       << "                MAX_DEPTH can be set to 0 for pure globbing and "
+          "ignoring"
+       << std::endl
+       << "                the NAME option (no recursion after the end of the "
+          "path)"
+       << std::endl
+       << std::endl
+       << "  -h            display this help and exit" << std::endl
+       << std::endl
+       << "Examples:" << std::endl
+       << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n "
+          "some?file*name"
+       << std::endl
+       << "hdfs_find / -n file_name -m 3" << std::endl;
+  return desc.str();
+}
+
+bool Find::Do() {
+  if (!Initialize()) {
+    std::cerr << "Unable to initialize HDFS find tool" << std::endl;
+    return false;
+  }
+
+  if (!ValidateConstraints()) {
+    std::cout << GetDescription();
+    return false;
+  }
+
+  if (opt_val_.count("help") > 0) {
+    return HandleHelp();
+  }
+
+  if (opt_val_.count("path") > 0) {
+    const auto path = opt_val_["path"].as<std::string>();
+    const auto name =
+        opt_val_.count("name") > 0 ? opt_val_["name"].as<std::string>() : "*";
+    const auto max_depth = opt_val_.count("max-depth") <= 0
+                               ? hdfs::FileSystem::GetDefaultFindMaxDepth()
+                               : opt_val_["max-depth"].as<uint32_t>();
+    return HandlePath(path, name, max_depth);
+  }
+
+  return false;
+}
+
+bool Find::HandleHelp() const {
+  std::cout << GetDescription();
+  return true;
+}
+
+bool Find::HandlePath(const std::string &path, const std::string &name,
+                      const uint32_t max_depth) const {
+  // Building a URI object from the given path
+  auto uri = hdfs::parse_path_or_exit(path);
+
+  const auto fs = hdfs::doConnect(uri, true);
+  if (!fs) {
+    std::cerr << "Could not connect the file system." << std::endl;
+    return false;
+  }
+
+  const auto promise = std::make_shared<std::promise<void>>();
+  std::future<void> future(promise->get_future());
+  auto final_status = hdfs::Status::OK();
+
+  /**
+   * Keep requesting more until we get the entire listing. Set the promise
+   * when we have the entire listing to stop.
+   *
+   * Find guarantees that the handler will only be called once at a time,
+   * so we do not need any locking here. It also guarantees that the handler
+   * will be only called once with has_more_results set to false.
+   */
+  auto handler = [promise,
+                  &final_status](const hdfs::Status &status,
+                                 const std::vector<hdfs::StatInfo> &stat_info,
+                                 const bool has_more_results) -> bool {
+    // Print result chunks as they arrive
+    if (!stat_info.empty()) {
+      for (hdfs::StatInfo const &info : stat_info) {
+        std::cout << info.str() << std::endl;
+      }
+    }
+    if (!status.ok() && final_status.ok()) {
+      // We make sure we set 'status' only on the first error
+      final_status = status;
+    }
+    if (!has_more_results) {
+      promise->set_value(); // Set promise
+      return false;         // Request stop sending results
+    }
+    return true; // request more results
+  };
+
+  // Asynchronous call to Find
+  fs->Find(uri.get_path(), name, max_depth, handler);
+
+  // Block until promise is set
+  future.get();
+  if (!final_status.ok()) {
+    std::cerr << "Error: " << final_status.ToString() << std::endl;
+    return false;
+  }
+  return true;
+}
+} // namespace hdfs::tools
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h
new file mode 100644
index 0000000..9adde3c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_TOOLS_HDFS_FIND
+#define LIBHDFSPP_TOOLS_HDFS_FIND
+
+#include <string>
+
+#include <boost/program_options.hpp>
+
+#include "hdfs-tool.h"
+
+namespace hdfs::tools {
+/**
+ * {@class Find} is an {@class HdfsTool} finds all files recursively starting
+ * from the specified PATH and prints their file paths. This tool mimics the
+ * POSIX find.
+ */
+class Find : public HdfsTool {
+public:
+  /**
+   * {@inheritdoc}
+   */
+  Find(int argc, char **argv);
+
+  // Abiding to the Rule of 5
+  Find(const Find &) = default;
+  Find(Find &&) = default;
+  Find &operator=(const Find &) = delete;
+  Find &operator=(Find &&) = delete;
+  ~Find() override = default;
+
+  /**
+   * {@inheritdoc}
+   */
+  [[nodiscard]] std::string GetDescription() const override;
+
+  /**
+   * {@inheritdoc}
+   */
+  [[nodiscard]] bool Do() override;
+
+protected:
+  /**
+   * {@inheritdoc}
+   */
+  [[nodiscard]] bool Initialize() override;
+
+  /**
+   * {@inheritdoc}
+   */
+  [[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; }
+
+  /**
+   * {@inheritdoc}
+   */
+  [[nodiscard]] bool HandleHelp() const override;
+
+  /**
+   * Handle the path argument that's passed to this tool.
+   *
+   * @param path The path to the directory to begin the find.
+   * @param name The pattern name of the search term.
+   * @param max_depth The maximum depth of the traversal while searching through
+   * the folders.
+   *
+   * @return A boolean indicating the result of this operation.
+   */
+  [[nodiscard]] virtual bool HandlePath(const std::string &path,
+                                        const std::string &name,
+                                        uint32_t max_depth) const;
+
+private:
+  /**
+   * A boost data-structure containing the description of positional arguments
+   * passed to the command-line.
+   */
+  po::positional_options_description pos_opt_desc_;
+};
+} // namespace hdfs::tools
+#endif
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc
new file mode 100644
index 0000000..1f63aa7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+#include <exception>
+#include <iostream>
+
+#include <google/protobuf/stubs/common.h>
+
+#include "hdfs-find.h"
+
+int main(int argc, char *argv[]) {
+  const auto result = std::atexit([]() -> void {
+    // Clean up static data on exit and prevent valgrind memory leaks
+    google::protobuf::ShutdownProtobufLibrary();
+  });
+  if (result != 0) {
+    std::cerr
+        << "Error: Unable to schedule clean-up tasks for HDFS find tool, exiting"
+        << std::endl;
+    std::exit(EXIT_FAILURE);
+  }
+
+  hdfs::tools::Find find(argc, argv);
+  auto success = false;
+
+  try {
+    success = find.Do();
+  } catch (const std::exception &e) {
+    std::cerr << "Error: " << e.what() << std::endl;
+  }
+
+  if (!success) {
+    std::exit(EXIT_FAILURE);
+  }
+  return 0;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc
deleted file mode 100644
index 348f851..0000000
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
-*/
-
-#include <google/protobuf/stubs/common.h>
-#include <unistd.h>
-#include <future>
-#include "tools_common.h"
-
-void usage(){
-  std::cout << "Usage: hdfs_find [OPTION] PATH"
-      << std::endl
-      << std::endl << "Finds all files recursively starting from the"
-      << std::endl << "specified PATH and prints their file paths."
-      << std::endl << "This hdfs_find tool mimics the POSIX find."
-      << std::endl
-      << std::endl << "Both PATH and NAME can have wild-cards."
-      << std::endl
-      << std::endl << "  -n NAME       if provided all results will be matching the NAME pattern"
-      << std::endl << "                otherwise, the implicit '*' will be used"
-      << std::endl << "                NAME allows wild-cards"
-      << std::endl
-      << std::endl << "  -m MAX_DEPTH  if provided the maximum depth to recurse after the end of"
-      << std::endl << "                the path is reached will be limited by MAX_DEPTH"
-      << std::endl << "                otherwise, the maximum depth to recurse is unbound"
-      << std::endl << "                MAX_DEPTH can be set to 0 for pure globbing and ignoring"
-      << std::endl << "                the NAME option (no recursion after the end of the path)"
-      << std::endl
-      << std::endl << "  -h            display this help and exit"
-      << std::endl
-      << std::endl << "Examples:"
-      << std::endl << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n some?file*name"
-      << std::endl << "hdfs_find / -n file_name -m 3"
-      << std::endl;
-}
-
-int main(int argc, char *argv[]) {
-  //We should have at least 2 arguments
-  if (argc < 2) {
-    usage();
-    exit(EXIT_FAILURE);
-  }
-
-  int input;
-  //If NAME is not specified we use implicit "*"
-  std::string name = "*";
-  //If MAX_DEPTH is not specified we use the max value of uint_32_t
-  uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth();
-
-  //Using GetOpt to read in the values
-  opterr = 0;
-  while ((input = getopt(argc, argv, "hn:m:")) != -1) {
-    switch (input)
-    {
-    case 'h':
-      usage();
-      exit(EXIT_SUCCESS);
-    case 'n':
-      name = optarg;
-      break;
-    case 'm':
-      max_depth = std::stoi(optarg);
-      break;
-    case '?':
-      if (optopt == 'n' || optopt == 'm')
-        std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl;
-      else if (isprint(optopt))
-        std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
-      else
-        std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
-      usage();
-      exit(EXIT_FAILURE);
-    default:
-      exit(EXIT_FAILURE);
-    }
-  }
-  std::string uri_path = argv[optind];
-
-  //Building a URI object from the given uri_path
-  hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
-
-  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
-  if (!fs) {
-    std::cerr << "Could not connect the file system. " << std::endl;
-    exit(EXIT_FAILURE);
-  }
-
-  std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
-  std::future<void> future(promise->get_future());
-  hdfs::Status status = hdfs::Status::OK();
-
-  /**
-    * Keep requesting more until we get the entire listing. Set the promise
-    * when we have the entire listing to stop.
-    *
-    * Find guarantees that the handler will only be called once at a time,
-    * so we do not need any locking here. It also guarantees that the handler will be
-    * only called once with has_more_results set to false.
-    */
-  auto handler = [promise, &status]
-                  (const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
-    //Print result chunks as they arrive
-    if(!si.empty()) {
-      for (hdfs::StatInfo const& s : si) {
-        std::cout << s.str() << std::endl;
-      }
-    }
-    if(!s.ok() && status.ok()){
-      //We make sure we set 'status' only on the first error.
-      status = s;
-    }
-    if (!has_more_results) {
-      promise->set_value();  //set promise
-      return false;         //request stop sending results
-    }
-    return true;  //request more results
-  };
-
-  //Asynchronous call to Find
-  fs->Find(uri.get_path(), name, max_depth, handler);
-
-  //block until promise is set
-  future.get();
-  if(!status.ok()) {
-    std::cerr << "Error: " << status.ToString() << std::endl;
-  }
-
-  // Clean up static data and prevent valgrind memory leaks
-  google::protobuf::ShutdownProtobufLibrary();
-  return 0;
-}

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org