You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/08/12 05:05:09 UTC

[arrow] branch master updated: ARROW-17370: [C++] Add limit to SplitString() (#13833)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a70908dc3c ARROW-17370: [C++] Add limit to SplitString() (#13833)
a70908dc3c is described below

commit a70908dc3cada0b3a7bc1fd06f7fa6982b4b1160
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Fri Aug 12 14:05:03 2022 +0900

    ARROW-17370: [C++] Add limit to SplitString() (#13833)
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 cpp/src/arrow/util/string.cc      |  9 +++++++--
 cpp/src/arrow/util/string.h       |  3 ++-
 cpp/src/arrow/util/string_test.cc | 26 ++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc
index 3a15860055..00ab8e64c4 100644
--- a/cpp/src/arrow/util/string.cc
+++ b/cpp/src/arrow/util/string.cc
@@ -92,11 +92,16 @@ Status ParseHexValue(const char* data, uint8_t* out) {
 
 namespace internal {
 
-std::vector<util::string_view> SplitString(util::string_view v, char delimiter) {
+std::vector<util::string_view> SplitString(util::string_view v, char delimiter,
+                                           int64_t limit) {
   std::vector<util::string_view> parts;
   size_t start = 0, end;
   while (true) {
-    end = v.find(delimiter, start);
+    if (limit > 0 && static_cast<size_t>(limit - 1) <= parts.size()) {
+      end = std::string::npos;
+    } else {
+      end = v.find(delimiter, start);
+    }
     parts.push_back(v.substr(start, end - start));
     if (end == std::string::npos) {
       break;
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d2c8ac38ee..b2baa0ebed 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -45,7 +45,8 @@ namespace internal {
 
 /// \brief Split a string with a delimiter
 ARROW_EXPORT
-std::vector<util::string_view> SplitString(util::string_view v, char delim);
+std::vector<util::string_view> SplitString(util::string_view v, char delim,
+                                           int64_t limit = 0);
 
 /// \brief Join strings with a delimiter
 ARROW_EXPORT
diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc
index 057d885fcd..2aa6fccbd9 100644
--- a/cpp/src/arrow/util/string_test.cc
+++ b/cpp/src/arrow/util/string_test.cc
@@ -140,5 +140,31 @@ TEST(SplitString, OnlyDemiliter) {
   EXPECT_EQ(parts[1], "");
 }
 
+TEST(SplitString, Limit) {
+  std::string input = "a:b:c";
+  auto parts = SplitString(input, ':', 2);
+  ASSERT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "b:c");
+}
+
+TEST(SplitString, LimitOver) {
+  std::string input = "a:b:c";
+  auto parts = SplitString(input, ':', 4);
+  ASSERT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "b");
+  EXPECT_EQ(parts[2], "c");
+}
+
+TEST(SplitString, LimitZero) {
+  std::string input = "a:b:c";
+  auto parts = SplitString(input, ':', 0);
+  ASSERT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "b");
+  EXPECT_EQ(parts[2], "c");
+}
+
 }  // namespace internal
 }  // namespace arrow