You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by wk...@apache.org on 2024/04/15 14:58:38 UTC
(trafficserver) branch master updated: Avoid including pcre2.h in Regex.h. (#11246)
This is an automated email from the ASF dual-hosted git repository.
wkaras pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 08506ef708 Avoid including pcre2.h in Regex.h. (#11246)
08506ef708 is described below
commit 08506ef708b9bb05c6254be1a521f7d60f12e1cf
Author: Walt Karas <wk...@yahooinc.com>
AuthorDate: Mon Apr 15 10:58:32 2024 -0400
Avoid including pcre2.h in Regex.h. (#11246)
---
include/tsutil/Regex.h | 46 ++++++++++++--------
src/tsutil/Regex.cc | 115 +++++++++++++++++++++++++++----------------------
2 files changed, 93 insertions(+), 68 deletions(-)
diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h
index c4ca8feb03..44e10d53b2 100644
--- a/include/tsutil/Regex.h
+++ b/include/tsutil/Regex.h
@@ -28,14 +28,14 @@
#include <vector>
#include <memory>
-#define PCRE2_CODE_UNIT_WIDTH 8
-#include <pcre2.h>
-
/// @brief Match flags for regular expression evaluation.
+///
+/// @internal These values are copied from pcre2.h, to avoid having to include it. The values are checked (with
+/// static_assert) in Regex.cc against PCRE2 named constants, in case they change in future PCRE2 releases.
enum REFlags {
- RE_CASE_INSENSITIVE = PCRE2_CASELESS, ///< Ignore case (default: case sensitive).
- RE_UNANCHORED = PCRE2_MULTILINE, ///< Unanchored (DFA defaults to anchored).
- RE_ANCHORED = PCRE2_ANCHORED, ///< Anchored (Regex defaults to unanchored).
+ RE_CASE_INSENSITIVE = 0x00000008u, ///< Ignore case (default: case sensitive).
+ RE_UNANCHORED = 0x00000400u, ///< Unanchored (DFA defaults to anchored).
+ RE_ANCHORED = 0x80000000u, ///< Anchored (Regex defaults to unanchored).
};
/// @brief Wrapper for PCRE2 match data.
@@ -63,19 +63,25 @@ public:
size_t *get_ovector_pointer();
int32_t size() const;
-protected:
- pcre2_match_data *get_match_data();
- void set_subject(std::string_view subject);
- void set_size(int32_t size);
-
private:
constexpr static uint32_t DEFAULT_MATCHES = 10;
static void *malloc(size_t size, void *caller);
- pcre2_match_data *_match_data = nullptr;
std::string_view _subject;
char _buffer[24 + 96 + 16 * DEFAULT_MATCHES]; // 24 bytes for the general context, 96 bytes overhead, 16 bytes per match.
size_t _buffer_bytes_used = 0;
int32_t _size = 0;
+
+ /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex
+ /// API (see Regex.cc).
+ struct _MatchData;
+ class _MatchDataPtr
+ {
+ friend struct _MatchData;
+
+ private:
+ void *_ptr = nullptr;
+ };
+ _MatchDataPtr _match_data;
};
/// @brief Wrapper for PCRE2 regular expression.
@@ -135,11 +141,17 @@ public:
int get_capture_count();
private:
- // @internal - Because the PCRE header is badly done, we can't forward declare the PCRE
- // enough to use as pointers. For some reason the header defines in name only a struct and
- // then aliases it to the standard name, rather than simply declare the latter in name only.
- // The goal is completely wrap PCRE and not include that header in client code.
- pcre2_code *_code = nullptr;
+ /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex
+ /// API (see Regex.cc).
+ struct _Code;
+ class _CodePtr
+ {
+ friend struct _Code;
+
+ private:
+ void *_ptr = nullptr;
+ };
+ _CodePtr _code;
};
/** Deterministic Finite state Automata container.
diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc
index 37e4b4337b..fb2cddbb2e 100644
--- a/src/tsutil/Regex.cc
+++ b/src/tsutil/Regex.cc
@@ -23,11 +23,18 @@
#include "tsutil/Regex.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
#include <array>
#include <assert.h>
#include <vector>
#include <mutex>
+static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSERSITIVE for current PCRE2 version.");
+static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_MULTILINE for current PCRE2 version.");
+static_assert(RE_ANCHORED == PCRE2_ANCHORED, "Update RE_ANCHORED for current PCRE2 version.");
+
//----------------------------------------------------------------------------
namespace
{
@@ -145,13 +152,27 @@ RegexContextCleanup::push_back(RegexContext *ctx)
} // namespace
+//----------------------------------------------------------------------------
+struct RegexMatches::_MatchData {
+ static pcre2_match_data *
+ get(_MatchDataPtr const &p)
+ {
+ return static_cast<pcre2_match_data *>(p._ptr);
+ }
+ static void
+ set(_MatchDataPtr &p, pcre2_match_data *ptr)
+ {
+ p._ptr = ptr;
+ }
+};
+
//----------------------------------------------------------------------------
RegexMatches::RegexMatches(uint32_t size)
{
pcre2_general_context *ctx = pcre2_general_context_create(
&RegexMatches::malloc, [](void *, void *) -> void {}, static_cast<void *>(this));
- _match_data = pcre2_match_data_create(size, ctx);
+ _MatchData::set(_match_data, pcre2_match_data_create(size, ctx));
}
//----------------------------------------------------------------------------
@@ -175,8 +196,9 @@ RegexMatches::malloc(size_t size, void *caller)
//----------------------------------------------------------------------------
RegexMatches::~RegexMatches()
{
- if (_match_data != nullptr) {
- pcre2_match_data_free(_match_data);
+ auto ptr = _MatchData::get(_match_data);
+ if (ptr != nullptr) {
+ pcre2_match_data_free(ptr);
}
}
@@ -184,7 +206,7 @@ RegexMatches::~RegexMatches()
size_t *
RegexMatches::get_ovector_pointer()
{
- return pcre2_get_ovector_pointer(_match_data);
+ return pcre2_get_ovector_pointer(_MatchData::get(_match_data));
}
//----------------------------------------------------------------------------
@@ -194,52 +216,46 @@ RegexMatches::size() const
return _size;
}
-//----------------------------------------------------------------------------
-pcre2_match_data *
-RegexMatches::get_match_data()
-{
- return _match_data;
-}
-
-//----------------------------------------------------------------------------
-void
-RegexMatches::set_size(int32_t size)
-{
- _size = size;
-}
-
-//----------------------------------------------------------------------------
-void
-RegexMatches::set_subject(std::string_view subject)
-{
- _subject = subject;
-}
-
//----------------------------------------------------------------------------
std::string_view
RegexMatches::operator[](size_t index) const
{
// check if the index is valid
- if (index >= pcre2_get_ovector_count(_match_data)) {
+ if (index >= pcre2_get_ovector_count(_MatchData::get(_match_data))) {
return std::string_view();
}
- PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_match_data);
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_MatchData::get(_match_data));
return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * index + 1] - ovector[2 * index]);
}
+//----------------------------------------------------------------------------
+struct Regex::_Code {
+ static pcre2_code *
+ get(_CodePtr const &p)
+ {
+ return static_cast<pcre2_code *>(p._ptr);
+ }
+ static void
+ set(_CodePtr &p, pcre2_code *ptr)
+ {
+ p._ptr = ptr;
+ }
+};
+
//----------------------------------------------------------------------------
Regex::Regex(Regex &&that) noexcept
{
- _code = that._code;
- that._code = nullptr;
+ _code = that._code;
+ _Code::set(that._code, nullptr);
}
//----------------------------------------------------------------------------
Regex::~Regex()
{
- if (_code != nullptr) {
- pcre2_code_free(_code);
+ auto ptr = _Code::get(_code);
+ if (ptr != nullptr) {
+ pcre2_code_free(ptr);
}
}
@@ -258,8 +274,8 @@ bool
Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, uint32_t flags)
{
// free the existing compiled regex if there is one
- if (_code != nullptr) {
- pcre2_code_free(_code);
+ if (auto ptr = _Code::get(_code); ptr != nullptr) {
+ pcre2_code_free(ptr);
}
// get the RegexContext instance - should only be null when shutting down
@@ -270,9 +286,9 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u
PCRE2_SIZE error_offset;
int error_code;
- _code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern.data()), pattern.size(), flags, &error_code, &error_offset,
- regex_context->get_compile_context());
- if (!_code) {
+ auto code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern.data()), pattern.size(), flags, &error_code, &error_offset,
+ regex_context->get_compile_context());
+ if (!code) {
erroroffset = error_offset;
// get pcre2 error message
@@ -283,7 +299,9 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u
}
// support for JIT
- pcre2_jit_compile(_code, PCRE2_JIT_COMPLETE);
+ pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
+
+ _Code::set(_code, code);
return true;
}
@@ -292,7 +310,7 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u
bool
Regex::exec(std::string_view subject) const
{
- if (_code == nullptr) {
+ if (_Code::get(_code) == nullptr) {
return false;
}
RegexMatches matches;
@@ -305,28 +323,23 @@ Regex::exec(std::string_view subject) const
int32_t
Regex::exec(std::string_view subject, RegexMatches &matches) const
{
- // check if there is a compiled regex
- if (_code == nullptr) {
- return 0;
- }
+ auto code = _Code::get(_code);
- // get the RegexContext instance - should only be null when shutting down
- RegexContext *regex_context = RegexContext::get_instance();
- if (regex_context == nullptr) {
+ // check if there is a compiled regex
+ if (code == nullptr) {
return 0;
}
+ int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, 0,
+ RegexMatches::_MatchData::get(matches._match_data), RegexContext::get_instance()->get_match_context());
- int count = pcre2_match(_code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, 0, matches.get_match_data(),
- regex_context->get_match_context());
-
- matches.set_size(count);
+ matches._size = count;
if (count < 0) {
return count;
}
if (count > 0) {
- matches.set_subject(subject);
+ matches._subject = subject;
}
return count;
@@ -337,7 +350,7 @@ int32_t
Regex::get_capture_count()
{
int captures = -1;
- if (pcre2_pattern_info(_code, PCRE2_INFO_CAPTURECOUNT, &captures) != 0) {
+ if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_CAPTURECOUNT, &captures) != 0) {
return -1;
}
return captures;