You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2020/01/17 16:05:09 UTC

[incubator-doris] branch master updated: Add path util (#2747)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c71eefa  Add path util (#2747)
c71eefa is described below

commit c71eefa2acdaf4770055cc7b329019e26b81da49
Author: LingBin <li...@gmail.com>
AuthorDate: Sat Jan 18 00:05:00 2020 +0800

    Add path util (#2747)
    
    Note that the methods in path_util are only related to path processing,
    and do not involve any file and IO operations
    
    The upcoming patch will use these util methods, used to extract operations
    such as concatenation of directory strings from processing logic.
---
 be/src/util/CMakeLists.txt      |  1 +
 be/src/util/path_util.cpp       | 84 +++++++++++++++++++++++++++++++++++++++
 be/src/util/path_util.h         | 58 +++++++++++++++++++++++++++
 be/test/util/CMakeLists.txt     |  2 +
 be/test/util/path_util_test.cpp | 88 +++++++++++++++++++++++++++++++++++++++++
 run-ut.sh                       |  1 +
 6 files changed, 234 insertions(+)

diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index dccdded..2159056 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -86,6 +86,7 @@ set(UTIL_FILES
   zip_util.cpp        
   utf8_check.cpp
   cgroup_util.cpp
+  path_util.cpp
 )
 
 if (WITH_MYSQL)
diff --git a/be/src/util/path_util.cpp b/be/src/util/path_util.cpp
new file mode 100644
index 0000000..e9eb482
--- /dev/null
+++ b/be/src/util/path_util.cpp
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/path_util.h"
+
+#include <cstring>
+#include <memory>
+// Use the POSIX version of dirname(3). See `man 3 dirname`
+#include <libgen.h>
+
+#include "common/logging.h"
+#include "gutil/strings/split.h"
+#include "gutil/strings/stringpiece.h"
+#include "gutil/strings/strip.h"
+
+using std::string;
+using std::vector;
+using strings::SkipEmpty;
+using strings::Split;
+
+namespace doris {
+namespace path_util {
+
+const string kTmpInfix = ".doristmp";
+
+string join_path_segments(const string& a, const string& b) {
+    if (a.empty()) {
+        return b;
+    } else if (b.empty()) {
+        return a;
+    } else {
+        return StripSuffixString(a, "/") + "/" + StripPrefixString(b, "/");
+    }
+}
+
+vector<string> join_path_segments_v(const vector<string>& v, const string& s) {
+    vector<string> out;
+    for (const string& path : v) {
+        out.emplace_back(join_path_segments(path, s));
+    }
+    return out;
+}
+
+vector<string> split_path(const string& path) {
+    if (path.empty()) {
+        return {};
+    }
+    vector<string> segments;
+    if (path[0] == '/') {
+        segments.emplace_back("/");
+    }
+    vector<StringPiece> pieces = Split(path, "/", SkipEmpty());
+    for (const StringPiece& piece : pieces) {
+        segments.emplace_back(piece.data(), piece.size());
+    }
+    return segments;
+}
+
+string dir_name(const string& path) {
+    std::unique_ptr<char[]> path_copy(strdup(path.c_str()));
+    return dirname(path_copy.get());
+}
+
+string base_name(const string& path) {
+    std::unique_ptr<char[]> path_copy(strdup(path.c_str()));
+    return basename(path_copy.get());
+}
+
+} // namespace path_util
+} // namespace doris
diff --git a/be/src/util/path_util.h b/be/src/util/path_util.h
new file mode 100644
index 0000000..b1e5f1a
--- /dev/null
+++ b/be/src/util/path_util.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// Utility methods for dealing with file paths.
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace doris {
+namespace path_util {
+
+// NOTE: The methods here are only related to path processing, do not involve
+// any file and IO operations.
+extern const std::string kTmpInfix;
+
+// Join two path segments with the appropriate path separator, if necessary.
+std::string join_path_segments(const std::string& a, const std::string& b);
+
+// Join each path segment in a list with a common suffix segment.
+std::vector<std::string> join_path_segments_v(const std::vector<std::string>& v,
+                                              const std::string& s);
+
+// Split a path into segments with the appropriate path separator.
+std::vector<std::string> split_path(const std::string& path);
+
+// Return the enclosing directory of path.
+// This is like dirname(3) but for C++ strings.
+// The following list of examples shows the strings returned by dirname() and basename():
+//   path         dirname    basename
+//   "/usr/lib"    "/usr"    "lib"
+//   "/usr/"       "/"       "usr"
+//   "usr"         "."       "usr"
+//   "/"           "/"       "/"
+//   "."           "."       "."
+//   ".."          "."       ".."
+std::string dir_name(const std::string& path);
+
+// Return the terminal component of a path.
+// This is like basename(3) but for C++ strings.
+std::string base_name(const std::string& path);
+
+} // namespace path_util
+} // namespace doris
diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt
index bf2f38b..3154de5 100644
--- a/be/test/util/CMakeLists.txt
+++ b/be/test/util/CMakeLists.txt
@@ -56,3 +56,5 @@ ADD_BE_TEST(radix_sort_test)
 ADD_BE_TEST(zip_util_test)
 ADD_BE_TEST(utf8_check_test)
 ADD_BE_TEST(cgroup_util_test)
+ADD_BE_TEST(path_util_test)
+
diff --git a/be/test/util/path_util_test.cpp b/be/test/util/path_util_test.cpp
new file mode 100644
index 0000000..7cf1048
--- /dev/null
+++ b/be/test/util/path_util_test.cpp
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/path_util.h"
+
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "common/config.h"
+#include "util/logging.h"
+
+using std::string;
+using std::vector;
+
+namespace doris {
+
+TEST(TestPathUtil, JoinPathSegments) {
+    ASSERT_EQ("a", path_util::join_path_segments("a", ""));
+    ASSERT_EQ("b", path_util::join_path_segments("", "b"));
+    ASSERT_EQ("a/b", path_util::join_path_segments("a", "b"));
+    ASSERT_EQ("a/b", path_util::join_path_segments("a/", "b"));
+    ASSERT_EQ("a/b", path_util::join_path_segments("a", "/b"));
+    ASSERT_EQ("a/b", path_util::join_path_segments("a/", "/b"));
+}
+
+TEST(TestPathUtil, BaseNameTest) {
+    ASSERT_EQ(".", path_util::base_name(""));
+    ASSERT_EQ(".", path_util::base_name("."));
+    ASSERT_EQ("..", path_util::base_name(".."));
+    ASSERT_EQ("/", path_util::base_name("/"));
+    ASSERT_EQ("/", path_util::base_name("//"));
+    ASSERT_EQ("a", path_util::base_name("a"));
+    ASSERT_EQ("ab", path_util::base_name("ab"));
+    ASSERT_EQ("ab", path_util::base_name("ab/"));
+    ASSERT_EQ("cd", path_util::base_name("ab/cd"));
+    ASSERT_EQ("ab", path_util::base_name("/ab"));
+    ASSERT_EQ("ab", path_util::base_name("/ab///"));
+    ASSERT_EQ("cd", path_util::base_name("/ab/cd"));
+}
+
+TEST(TestPathUtil, DirNameTest) {
+    ASSERT_EQ(".", path_util::dir_name(""));
+    ASSERT_EQ(".", path_util::dir_name("."));
+    ASSERT_EQ(".", path_util::dir_name(".."));
+    ASSERT_EQ("/", path_util::dir_name("/"));
+    ASSERT_EQ("//", path_util::dir_name("//"));
+    ASSERT_EQ(".", path_util::dir_name("a"));
+    ASSERT_EQ(".", path_util::dir_name("ab"));
+    ASSERT_EQ(".", path_util::dir_name("ab/"));
+    ASSERT_EQ("ab", path_util::dir_name("ab/cd"));
+    ASSERT_EQ("/", path_util::dir_name("/ab"));
+    ASSERT_EQ("/", path_util::dir_name("/ab///"));
+    ASSERT_EQ("/ab", path_util::dir_name("/ab/cd"));
+}
+
+TEST(TestPathUtil, SplitPathTest) {
+    using Vec = vector<string>;
+    ASSERT_EQ(Vec({"/"}), path_util::split_path("/"));
+    ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b"));
+    ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b/"));
+    ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a//b/"));
+    ASSERT_EQ(Vec({"a", "b"}), path_util::split_path("a/b"));
+    ASSERT_EQ(Vec({"."}), path_util::split_path("."));
+    ASSERT_EQ(Vec(), path_util::split_path(""));
+}
+
+} // namespace doris
+
+int main(int argc, char* argv[]) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/run-ut.sh b/run-ut.sh
index bd07724..297283d 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -167,6 +167,7 @@ ${DORIS_TEST_BINARY_DIR}/util/frame_of_reference_coding_test
 ${DORIS_TEST_BINARY_DIR}/util/zip_util_test
 ${DORIS_TEST_BINARY_DIR}/util/utf8_check_test
 ${DORIS_TEST_BINARY_DIR}/util/cgroup_util_test
+${DORIS_TEST_BINARY_DIR}/util/path_util_test
 
 # Running common Unittest
 ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org