You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/11/09 16:53:56 UTC

[GitHub] [arrow] bkietz commented on a diff in pull request #14602: ARROW-18270: [Python] Remove gcc 4.9 compatibility code

bkietz commented on code in PR #14602:
URL: https://github.com/apache/arrow/pull/14602#discussion_r1018189597


##########
python/pyarrow/src/arrow/python/datetime.cc:
##########
@@ -38,41 +39,27 @@ namespace internal {
 
 namespace {
 
-// Same as Regex '([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$'.
-// GCC 4.9 doesn't support regex, so handcode until support for it
-// is dropped.
 bool MatchFixedOffset(const std::string& tz, std::string_view* sign,
                       std::string_view* hour, std::string_view* minute) {
+  static const std::regex regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
   if (tz.size() < 5) {
     return false;
   }
-  const char* iter = tz.data();
-  if (*iter == '+' || *iter == '-') {
-    *sign = std::string_view(iter, 1);
-    iter++;
-    if (tz.size() < 6) {
-      return false;
-    }
-  }
-  if ((((*iter == '0' || *iter == '1') && *(iter + 1) >= '0' && *(iter + 1) <= '9') ||
-       (*iter == '2' && *(iter + 1) >= '0' && *(iter + 1) <= '3'))) {
-    *hour = std::string_view(iter, 2);
-    iter += 2;
-  } else {
-    return false;
-  }
-  if (*iter != ':') {
+  std::smatch match;
+  if (!std::regex_match(tz, match, regex)) {
     return false;
   }
-  iter++;
+  DCHECK_EQ(match.size(), 4);  // match #0 is the whole matched sequence
 
-  if (*iter >= '0' && *iter <= '5' && *(iter + 1) >= '0' && *(iter + 1) <= '9') {
-    *minute = std::string_view(iter, 2);
-    iter += 2;
-  } else {
-    return false;
-  }
-  return iter == (tz.data() + tz.size());
+  auto match_view = [&](int match_number) {
+    return std::string_view(&tz[match.position(match_number)],
+                            match.length(match_number));
+  };

Review Comment:
   I can't improve on this by much. `std::regex` really expects you to want a string.
   
   ```c++
   bool MatchFixedOffset(std::string_view tz, std::string_view* sign,
                         std::string_view* hour, std::string_view* minute) {
     static const std::regex regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
     if (tz.size() < 5) {
       return false;
     }
   
     std::match_results<decltype(target.begin())> match;
     if (!std::regex_match(tz.begin(), tz.end(), match, regex)) {
       return false;
     }
     DCHECK_EQ(match.size(), 4);  // match #0 is the whole matched sequence
   
     auto match_view = [&](int i) {
       return tz.substr(match.position(i), match.length(i));
     };
     *sign = match_view(1);
     *hour = match_view(2);
     *minute = match_view(3);
     return true;
   }
   ```
   
   OTOH, this might be a common enough problem to make a generic helper function for:
   
   ```c++
   // in util/string.h or util/regex.h
   bool Match(const std::regex& regex, std::string_view target,
                      std::initializer_list<std::string_view*> out_views) {
     std::match_results<decltype(target.begin())> match;
     if (!std::regex_match(target.begin(), target.end(), match, regex)) {
       return false;
     }
     DCHECK_EQ(match.size(), out_views.size() + 1); // match #0 is the whole matched sequence
   
     size_t i = 1;
     for (std::string_view* out_view : out_views) {
       *out_view = target.substr(match.position(i), match.length(i));
       ++i;
     }
     return true;
   }
   
   bool MatchFixedOffset(const std::string& tz, std::string_view* sign,
                         std::string_view* hour, std::string_view* minute) {
     if (tz.size() < 5) {
       return false;
     }
     static const std::regex regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
     return Match(tz, regex, {sign, hour, minute});
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org