You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/08/12 05:05:09 UTC
[arrow] branch master updated: ARROW-17370: [C++] Add limit to SplitString() (#13833)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a70908dc3c ARROW-17370: [C++] Add limit to SplitString() (#13833)
a70908dc3c is described below
commit a70908dc3cada0b3a7bc1fd06f7fa6982b4b1160
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Fri Aug 12 14:05:03 2022 +0900
ARROW-17370: [C++] Add limit to SplitString() (#13833)
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
cpp/src/arrow/util/string.cc | 9 +++++++--
cpp/src/arrow/util/string.h | 3 ++-
cpp/src/arrow/util/string_test.cc | 26 ++++++++++++++++++++++++++
3 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc
index 3a15860055..00ab8e64c4 100644
--- a/cpp/src/arrow/util/string.cc
+++ b/cpp/src/arrow/util/string.cc
@@ -92,11 +92,16 @@ Status ParseHexValue(const char* data, uint8_t* out) {
namespace internal {
-std::vector<util::string_view> SplitString(util::string_view v, char delimiter) {
+std::vector<util::string_view> SplitString(util::string_view v, char delimiter,
+ int64_t limit) {
std::vector<util::string_view> parts;
size_t start = 0, end;
while (true) {
- end = v.find(delimiter, start);
+ if (limit > 0 && static_cast<size_t>(limit - 1) <= parts.size()) {
+ end = std::string::npos;
+ } else {
+ end = v.find(delimiter, start);
+ }
parts.push_back(v.substr(start, end - start));
if (end == std::string::npos) {
break;
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d2c8ac38ee..b2baa0ebed 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -45,7 +45,8 @@ namespace internal {
/// \brief Split a string with a delimiter
ARROW_EXPORT
-std::vector<util::string_view> SplitString(util::string_view v, char delim);
+std::vector<util::string_view> SplitString(util::string_view v, char delim,
+ int64_t limit = 0);
/// \brief Join strings with a delimiter
ARROW_EXPORT
diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc
index 057d885fcd..2aa6fccbd9 100644
--- a/cpp/src/arrow/util/string_test.cc
+++ b/cpp/src/arrow/util/string_test.cc
@@ -140,5 +140,31 @@ TEST(SplitString, OnlyDemiliter) {
EXPECT_EQ(parts[1], "");
}
+TEST(SplitString, Limit) {
+ std::string input = "a:b:c";
+ auto parts = SplitString(input, ':', 2);
+ ASSERT_EQ(parts.size(), 2);
+ EXPECT_EQ(parts[0], "a");
+ EXPECT_EQ(parts[1], "b:c");
+}
+
+TEST(SplitString, LimitOver) {
+ std::string input = "a:b:c";
+ auto parts = SplitString(input, ':', 4);
+ ASSERT_EQ(parts.size(), 3);
+ EXPECT_EQ(parts[0], "a");
+ EXPECT_EQ(parts[1], "b");
+ EXPECT_EQ(parts[2], "c");
+}
+
+TEST(SplitString, LimitZero) {
+ std::string input = "a:b:c";
+ auto parts = SplitString(input, ':', 0);
+ ASSERT_EQ(parts.size(), 3);
+ EXPECT_EQ(parts[0], "a");
+ EXPECT_EQ(parts[1], "b");
+ EXPECT_EQ(parts[2], "c");
+}
+
} // namespace internal
} // namespace arrow