You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by GitBox <gi...@apache.org> on 2022/09/09 09:10:06 UTC

[GitHub] [nifi-minifi-cpp] adamdebreceni commented on a diff in pull request #1387: MINIFICPP-1903 - Take advantage of more optimal regex methods on supported platforms

adamdebreceni commented on code in PR #1387:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1387#discussion_r966812547


##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
 #include <vector>
 
 #include "Exception.h"
+#include <regex.h>
+
+namespace org::apache::nifi::minifi::utils {
 
 #ifndef NO_MORE_REGFREEE
-namespace {
 
-std::size_t getMaxGroupCountOfRegex(const std::string& regex) {
-  return std::count(regex.begin(), regex.end(), '(') + 1;
+SMatch::SMatch(const SMatch& other) {
+  *this = other;
 }
 
-}  // namespace
-#endif
-
-namespace org::apache::nifi::minifi::utils {
+SMatch::SMatch(SMatch&& other) {
+  *this = std::move(other);
+}
 
-#ifndef NO_MORE_REGFREEE
-SMatch::SuffixWrapper SMatch::suffix() const {
-  if ((size_t) matches_[0].match.rm_eo >= string_.size()) {
-    return SuffixWrapper{std::string()};
-  } else {
-    return SuffixWrapper{string_.substr(matches_[0].match.rm_eo)};
+SMatch& SMatch::operator=(const SMatch& other) {
+  if (this == &other) {
+    return *this;
+  }
+  reset(other.string_);
+  matches_.reserve(other.matches_.size());
+  ready_ = other.ready_;
+  for (const auto& sub_match : other.matches_) {
+    size_t begin_off = gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.first));
+    size_t end_off = gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.second));
+    matches_.push_back(Regmatch{sub_match.matched, string_.begin() + begin_off, string_.begin() + end_off});
   }
+  return *this;
+}
+
+SMatch& SMatch::operator=(SMatch&& other) {
+  // trigger the copy assignment, we could optimize this (by moving the string/matches)
+  // but we would need to maintain a separate offsets vector, as after the move the original
+  // sub_matches' iterators are invalidated, if this turns out to be a performance bottleneck
+  // revisit this
+  return *this = other;
+}

Review Comment:
   an `SMatch` object would still have to contain a reference to the stored string, which would need to be updated on copy/move



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@nifi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org