You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/01/31 00:31:53 UTC
[kudu] branch master updated: [thirdparty] detection of broken
std::regex in libstdc++
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 5884166 [thirdparty] detection of broken std::regex in libstdc++
5884166 is described below
commit 588416694c0543f33d3287f7d8547875a8adf51d
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Jan 29 19:56:33 2019 -0800
[thirdparty] detection of broken std::regex in libstdc++
Added detection of broken std::regex and friends in older
versions of libstdc++ (< 4.9) into the cpplint tool. For details,
see [1]. I tried to find some warning flags for the GNU compiler
and/or linker to warn about non- or partially-implemented features
of GNU libstdc++ that doesn't yield compilation errors as is,
however I could not find anything relevant.
The motivation for this change is the fact that Kudu still compiles
and links against older libstdc++ at RHEL/CentOS 7, Ubuntu 14.04
and SLES 12, but using C++11 std::regex functionality could lead
to unexpected crash if the binary is linked aginst libstdc++ of
versions prior 4.9. Yes, there are tests, but:
* The issue with std::regex in older libstdc++ affects only the
platforms mentioned above.
* It's much better to detect and warn about the already known issue
sooner to avoid wasting time on troubleshooting and debugging.
References:
[1] https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/manual/manual/status.html#status.iso.2011
section 28 'Regular Expressions'
Change-Id: Ibb7d01c536a3e2c059c9c9f6a8d14dcb6d6acad1
Reviewed-on: http://gerrit.cloudera.org:8080/12302
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
---
build-support/lint.sh | 5 +-
thirdparty/download-thirdparty.sh | 2 +-
thirdparty/patches/google-styleguide-cpplint.patch | 125 ++++++++++++++++++++-
3 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/build-support/lint.sh b/build-support/lint.sh
index 6cc2f74..f93168d 100755
--- a/build-support/lint.sh
+++ b/build-support/lint.sh
@@ -47,11 +47,12 @@ else
FILES=$(find $ROOT/src -name '*.cc' -or -name '*.h' | grep -v "\.pb\.\|\.service\.\|\.proxy\.\|\.krpc\.\|gutil\|trace_event\|kudu_export\.h\|x509_check_host")
fi
-cd $ROOT
+cpplint_filter="+runtime/broken_libstdcpp_regex,-whitespace/comments,-readability/todo,-readability/inheritance,-build/header_guard,-build/include_order,-legal/copyright,-build/c++11,-readability/nolint"
+cd $ROOT
$ROOT/thirdparty/installed/common/bin/cpplint.py \
--verbose=4 \
- --filter=-whitespace/comments,-readability/todo,-readability/inheritance,-build/header_guard,-build/include_order,-legal/copyright,-build/c++11,-readability/nolint \
+ --filter=$cpplint_filter \
$FILES 2>&1 | grep -v 'Done processing' | tee $TMP
NUM_ERRORS=$(grep "Total errors found" $TMP | awk '{print $4}')
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index 9b19305..95a3aa1 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -276,7 +276,7 @@ fetch_and_patch \
$MUSTACHE_SOURCE \
$MUSTACHE_PATCHLEVEL
-GSG_PATCHLEVEL=1
+GSG_PATCHLEVEL=2
fetch_and_patch \
google-styleguide-${GSG_VERSION}.tar.gz \
$GSG_SOURCE \
diff --git a/thirdparty/patches/google-styleguide-cpplint.patch b/thirdparty/patches/google-styleguide-cpplint.patch
index b8212cf..ab92de3 100644
--- a/thirdparty/patches/google-styleguide-cpplint.patch
+++ b/thirdparty/patches/google-styleguide-cpplint.patch
@@ -1,6 +1,115 @@
---- a/cpplint/cpplint.py 2017-09-19 22:24:22.000000000 -0700
-+++ b/cpplint/cpplint.py 2017-09-19 22:24:52.000000000 -0700
-@@ -5488,7 +5488,7 @@
+--- a/cpplint/cpplint.py 2019-01-29 19:51:49.000000000 -0800
++++ b/cpplint/cpplint.py 2019-01-30 15:19:26.000000000 -0800
+@@ -207,6 +207,7 @@
+ 'readability/todo',
+ 'readability/utf8',
+ 'runtime/arrays',
++ 'runtime/broken_libstdcpp_regex',
+ 'runtime/casting',
+ 'runtime/explicit',
+ 'runtime/int',
+@@ -251,7 +252,7 @@
+ # flag. By default all errors are on, so only add here categories that should be
+ # off by default (i.e., categories that must be enabled by the --filter= flags).
+ # All entries here should start with a '-' or '+', as in the --filter= flag.
+-_DEFAULT_FILTERS = ['-build/include_alpha']
++_DEFAULT_FILTERS = ['-build/include_alpha','-runtime/broken_libstdcpp_regex']
+
+ # We used to check for high-bit characters, but after much discussion we
+ # decided those were OK, as long as they were in UTF-8 and didn't represent
+@@ -1934,6 +1935,91 @@
+ '...) for improved thread safety.')
+
+
++# (broken-std::regex-stuff, libc-alternative, validation pattern)
++#
++# See the inline documentation for the CheckBrokenLibStdCppRegex() function
++# for details.
++_STDCPP_REGEX_PREFIX = r'(\s|[<({;,?:]\s*)(std::)?'
++_STDCPP_REGEX_LIST = (
++ ('basic_regex', 'regex_t', _STDCPP_REGEX_PREFIX + r'basic_regex\s*<'),
++ ('regex', 'regex_t', _STDCPP_REGEX_PREFIX + r'[w]?regex([&*]|\s+)'),
++ ('match_results', 'regmatch_t',
++ _STDCPP_REGEX_PREFIX + r'match_results\s*<'),
++ ('[cs]match', 'regmatch_t',
++ _STDCPP_REGEX_PREFIX + r'[w]?[cs]match([&*]|\s+)'),
++ ('sub_match', 'regmatch_t', _STDCPP_REGEX_PREFIX + r'sub_match\s*<'),
++ ('[cs]sub_match', 'regmatch_t',
++ _STDCPP_REGEX_PREFIX + r'[w]?[cs]sub_match([&*]|\s+)'),
++ ('regex_iterator', 'regmatch_t',
++ _STDCPP_REGEX_PREFIX + r'regex_iterator([&*]|\s+)'),
++ ('regex_token_iterator', 'regmatch_t',
++ _STDCPP_REGEX_PREFIX + r'regex_token_iterator([&*]|\s+)'),
++ ('regex_search()', 'regexec()',
++ _STDCPP_REGEX_PREFIX + r'regex_search\s*\([^)]+\)'),
++ ('regex_match()', 'regexec()',
++ _STDCPP_REGEX_PREFIX + r'regex_match\s*\([^)]+\)'),
++ )
++
++def CheckBrokenLibStdCppRegex(filename, clean_lines, linenum, error):
++ """Checks for broken std::regex and friends in older libstdc++.
++
++ With older g++ and libstdc++ (version < 4.9) it's possible to successfully
++ build (i.e. compile and link) a binary from C++ code using std::regex and
++ friends. However, the code will throw unexpected std::regex_error exception
++ while compiling the regex even if the regex is valid. The same code works
++ perfectly fine if built with newer g++/libstdc++ (version >= 4.9) or
++ with clang/libc++. See the snippet below for an example.
++
++-----------------------------------------------------------------------------
++$ cat regex-test.cc
++#include <regex>
++#include <string>
++
++bool fun(const std::string& version_str) {
++ static const std::regex kVersionRegex(
++ "^[vV]([[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+)");
++
++ std::smatch match;
++ if (!std::regex_search(version_str, match, kVersionRegex)) {
++ return false;
++ }
++ if (match.size() != 2) {
++ return false;
++ }
++ return true;
++}
++
++int main() {
++ return fun("v1.2.3") ? 0 : -1;
++}
++$ c++ -std=c++11 regex-test.cc -o regex-test
++$ ./regex-test
++$ echo $?
++-----------------------------------------------------------------------------
++
++ As it turns out, that's documented: see section 28 'Regular Expressions' at
++ https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/manual/manual/status.html#status.iso.2011
++
++ Even if documented, that behavior is completely bogus and unexpected.
++ Projects that use C++11 features and need to be compiled at systems with
++ g++/libstdc++ of versions prior 4.9 should not use broken std::regex and
++ friends since their run-time behavior is unpredictable.
++
++ Args:
++ filename: The name of the current file.
++ clean_lines: A CleansedLines instance containing the file.
++ linenum: The number of the line to check.
++ error: The function to call with any errors found.
++ """
++ line = clean_lines.elided[linenum]
++ for regex_entity, libc_alternative, pattern in _STDCPP_REGEX_LIST:
++ if Search(pattern, line):
++ error(filename, linenum, 'runtime/broken_libstdcpp_regex', 4,
++ 'Consider using libc alternative \'' + libc_alternative +
++ '\' instead of broken \'' + regex_entity +
++ '\' in libstdc++ version < 4.9')
++
++
+ def CheckVlogArguments(filename, clean_lines, linenum, error):
+ """Checks that VLOG() is only used for defining a logging level.
+
+@@ -5488,7 +5574,7 @@
('<stack>', ('stack',)),
('<string>', ('char_traits', 'basic_string',)),
('<tuple>', ('tuple',)),
@@ -9,7 +118,7 @@
('<vector>', ('vector',)),
# gcc extensions.
-@@ -5501,8 +5501,7 @@
+@@ -5501,8 +5587,7 @@
_RE_PATTERN_STRING = re.compile(r'\bstring\b')
_re_pattern_algorithm_header = []
@@ -19,3 +128,11 @@
# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
# type::max().
_re_pattern_algorithm_header.append(
+@@ -5940,6 +6025,7 @@
+ nesting_state, error)
+ CheckVlogArguments(filename, clean_lines, line, error)
+ CheckPosixThreading(filename, clean_lines, line, error)
++ CheckBrokenLibStdCppRegex(filename, clean_lines, line, error)
+ CheckInvalidIncrement(filename, clean_lines, line, error)
+ CheckMakePairUsesDeduction(filename, clean_lines, line, error)
+ CheckDefaultLambdaCaptures(filename, clean_lines, line, error)