You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by pg...@apache.org on 2007/11/26 17:50:09 UTC
svn commit: r598339 [31/37] - in /httpd/httpd/vendor/pcre/current: ./ doc/
doc/html/ testdata/
Added: httpd/httpd/vendor/pcre/current/pcrecpp_unittest.cc
URL: http://svn.apache.org/viewvc/httpd/httpd/vendor/pcre/current/pcrecpp_unittest.cc?rev=598339&view=auto
==============================================================================
--- httpd/httpd/vendor/pcre/current/pcrecpp_unittest.cc (added)
+++ httpd/httpd/vendor/pcre/current/pcrecpp_unittest.cc Mon Nov 26 08:49:53 2007
@@ -0,0 +1,1240 @@
+// -*- coding: utf-8 -*-
+//
+// Copyright (c) 2005 - 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+//
+// TODO: Test extractions for PartialMatch/Consume
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <cassert>
+#include <vector>
+#include "pcrecpp.h"
+
+using pcrecpp::StringPiece;
+using pcrecpp::RE;
+using pcrecpp::RE_Options;
+using pcrecpp::Hex;
+using pcrecpp::Octal;
+using pcrecpp::CRadix;
+
+static bool VERBOSE_TEST = false;
+
+// CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// CHECK_EQ(fp->Write(x), 4)
+#define CHECK(condition) do { \
+ if (!(condition)) { \
+ fprintf(stderr, "%s:%d: Check failed: %s\n", \
+ __FILE__, __LINE__, #condition); \
+ exit(1); \
+ } \
+} while (0)
+
+#define CHECK_EQ(a, b) CHECK(a == b)
+
+static void Timing1(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:\\d+");
+ StringPiece p("ruby:1234");
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch(p));
+ }
+}
+
+static void Timing2(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:(\\d+)");
+ int i;
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch("ruby:1234", &i));
+ CHECK_EQ(i, 1234);
+ }
+}
+
+static void Timing3(int num_iters) {
+ string text_string;
+ for (int j = num_iters; j > 0; j--) {
+ text_string += "this is another line\n";
+ }
+
+ RE line_matcher(".*\n");
+ string line;
+ StringPiece text(text_string);
+ int counter = 0;
+ while (line_matcher.Consume(&text)) {
+ counter++;
+ }
+ printf("Matched %d lines\n", counter);
+}
+
+#if 0 // uncomment this if you have a way of defining VirtualProcessSize()
+
+static void LeakTest() {
+ // Check for memory leaks
+ unsigned long long initial_size = 0;
+ for (int i = 0; i < 100000; i++) {
+ if (i == 50000) {
+ initial_size = VirtualProcessSize();
+ printf("Size after 50000: %llu\n", initial_size);
+ }
+ char buf[100]; // definitely big enough
+ sprintf(buf, "pat%09d", i);
+ RE newre(buf);
+ }
+ uint64 final_size = VirtualProcessSize();
+ printf("Size after 100000: %llu\n", final_size);
+ const double growth = double(final_size - initial_size) / final_size;
+ printf("Growth: %0.2f%%", growth * 100);
+ CHECK(growth < 0.02); // Allow < 2% growth
+}
+
+#endif
+
+static void RadixTests() {
+ printf("Testing hex\n");
+
+#define CHECK_HEX(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ } while(0)
+
+ CHECK_HEX(short, 2bad);
+ CHECK_HEX(unsigned short, 2badU);
+ CHECK_HEX(int, dead);
+ CHECK_HEX(unsigned int, deadU);
+ CHECK_HEX(long, 7eadbeefL);
+ CHECK_HEX(unsigned long, deadbeefUL);
+#ifdef HAVE_LONG_LONG
+ CHECK_HEX(long long, 12345678deadbeefLL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
+#endif
+
+#undef CHECK_HEX
+
+ printf("Testing octal\n");
+
+#define CHECK_OCTAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ } while(0)
+
+ CHECK_OCTAL(short, 77777);
+ CHECK_OCTAL(unsigned short, 177777U);
+ CHECK_OCTAL(int, 17777777777);
+ CHECK_OCTAL(unsigned int, 37777777777U);
+ CHECK_OCTAL(long, 17777777777L);
+ CHECK_OCTAL(unsigned long, 37777777777UL);
+#ifdef HAVE_LONG_LONG
+ CHECK_OCTAL(long long, 777777777777777777777LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
+#endif
+
+#undef CHECK_OCTAL
+
+ printf("Testing decimal\n");
+
+#define CHECK_DECIMAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
+ CHECK_EQ(v, value); \
+ CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
+ CHECK_EQ(v, value); \
+ } while(0)
+
+ CHECK_DECIMAL(short, -1);
+ CHECK_DECIMAL(unsigned short, 9999);
+ CHECK_DECIMAL(int, -1000);
+ CHECK_DECIMAL(unsigned int, 12345U);
+ CHECK_DECIMAL(long, -10000000L);
+ CHECK_DECIMAL(unsigned long, 3083324652U);
+#ifdef HAVE_LONG_LONG
+ CHECK_DECIMAL(long long, -100000000000000LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
+#endif
+
+#undef CHECK_DECIMAL
+
+}
+
+static void TestReplace() {
+ printf("Testing Replace\n");
+
+ struct ReplaceTest {
+ const char *regexp;
+ const char *rewrite;
+ const char *original;
+ const char *single;
+ const char *global;
+ };
+ static const ReplaceTest tests[] = {
+ { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+ "\\2\\1ay",
+ "the quick brown fox jumps over the lazy dogs.",
+ "ethay quick brown fox jumps over the lazy dogs.",
+ "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
+ { "\\w+",
+ "\\0-NOSPAM",
+ "paul.haahr@google.com",
+ "paul-NOSPAM.haahr@google.com",
+ "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
+ { "^",
+ "(START)",
+ "foo",
+ "(START)foo",
+ "(START)foo" },
+ { "^",
+ "(START)",
+ "",
+ "(START)",
+ "(START)" },
+ { "$",
+ "(END)",
+ "",
+ "(END)",
+ "(END)" },
+ { "b",
+ "bb",
+ "ababababab",
+ "abbabababab",
+ "abbabbabbabbabb" },
+ { "b",
+ "bb",
+ "bbbbbb",
+ "bbbbbbb",
+ "bbbbbbbbbbbb" },
+ { "b+",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb" },
+ { "b*",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb" },
+ { "b*",
+ "bb",
+ "aaaaa",
+ "bbaaaaa",
+ "bbabbabbabbabbabb" },
+ { "b*",
+ "bb",
+ "aa\naa\n",
+ "bbaa\naa\n",
+ "bbabbabb\nbbabbabb\nbb" },
+ { "b*",
+ "bb",
+ "aa\raa\r",
+ "bbaa\raa\r",
+ "bbabbabb\rbbabbabb\rbb" },
+ { "b*",
+ "bb",
+ "aa\r\naa\r\n",
+ "bbaa\r\naa\r\n",
+ "bbabbabb\r\nbbabbabb\r\nbb" },
+#ifdef SUPPORT_UTF8
+ { "b*",
+ "bb",
+ "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
+ "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
+ "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
+ { "b*",
+ "bb",
+ "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
+ "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
+ ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
+ "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
+#endif
+ { "", NULL, NULL, NULL, NULL }
+ };
+
+#ifdef SUPPORT_UTF8
+ const bool support_utf8 = true;
+#else
+ const bool support_utf8 = false;
+#endif
+
+ for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+ RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
+ assert(re.error().empty());
+ string one(t->original);
+ CHECK(re.Replace(t->rewrite, &one));
+ CHECK_EQ(one, t->single);
+ string all(t->original);
+ CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
+ CHECK_EQ(all, t->global);
+ }
+
+ // One final test: test \r\n replacement when we're not in CRLF mode
+ {
+ RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
+ assert(re.error().empty());
+ string all("aa\r\naa\r\n");
+ CHECK(re.GlobalReplace("bb", &all) > 0);
+ CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+ }
+ {
+ RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
+ assert(re.error().empty());
+ string all("aa\r\naa\r\n");
+ CHECK(re.GlobalReplace("bb", &all) > 0);
+ CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+ }
+ // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
+ // Alas, the answer depends on how pcre was compiled.
+}
+
+static void TestExtract() {
+ printf("Testing Extract\n");
+
+ string s;
+
+ CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
+ CHECK_EQ(s, "kremvax!boris");
+
+ // check the RE interface as well
+ CHECK(RE(".*").Extract("'\\0'", "foo", &s));
+ CHECK_EQ(s, "'foo'");
+ CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
+ CHECK_EQ(s, "'foo'");
+}
+
+static void TestConsume() {
+ printf("Testing Consume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(! r.Consume(&input, &word));
+}
+
+static void TestFindAndConsume() {
+ printf("Testing FindAndConsume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("(\\w+)"); // matches a word
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "cccc");
+ CHECK(! r.FindAndConsume(&input, &word));
+}
+
+static void TestMatchNumberPeculiarity() {
+ printf("Testing match-number peculiaraity\n");
+
+ string word1;
+ string word2;
+ string word3;
+
+ RE r("(foo)|(bar)|(baz)");
+ CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
+ CHECK_EQ(word1, "foo");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "bar");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "baz");
+ CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
+
+ string a;
+ CHECK(RE("(foo)|hello").FullMatch("hello", &a));
+ CHECK_EQ(a, "");
+}
+
+static void TestRecursion() {
+ printf("Testing recursion\n");
+
+ // Get one string that passes (sometimes), one that never does.
+ string text_good("abcdefghijk");
+ string text_bad("acdefghijkl");
+
+ // According to pcretest, matching text_good against (\w+)*b
+ // requires match_limit of at least 8192, and match_recursion_limit
+ // of at least 37.
+
+ RE_Options options_ml;
+ options_ml.set_match_limit(8192);
+ RE re("(\\w+)*b", options_ml);
+ CHECK(re.PartialMatch(text_good) == true);
+ CHECK(re.PartialMatch(text_bad) == false);
+ CHECK(re.FullMatch(text_good) == false);
+ CHECK(re.FullMatch(text_bad) == false);
+
+ options_ml.set_match_limit(1024);
+ RE re2("(\\w+)*b", options_ml);
+ CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
+ CHECK(re2.PartialMatch(text_bad) == false);
+ CHECK(re2.FullMatch(text_good) == false);
+ CHECK(re2.FullMatch(text_bad) == false);
+
+ RE_Options options_mlr;
+ options_mlr.set_match_limit_recursion(50);
+ RE re3("(\\w+)*b", options_mlr);
+ CHECK(re3.PartialMatch(text_good) == true);
+ CHECK(re3.PartialMatch(text_bad) == false);
+ CHECK(re3.FullMatch(text_good) == false);
+ CHECK(re3.FullMatch(text_bad) == false);
+
+ options_mlr.set_match_limit_recursion(10);
+ RE re4("(\\w+)*b", options_mlr);
+ CHECK(re4.PartialMatch(text_good) == false);
+ CHECK(re4.PartialMatch(text_bad) == false);
+ CHECK(re4.FullMatch(text_good) == false);
+ CHECK(re4.FullMatch(text_bad) == false);
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
+ string quoted = RE::QuoteMeta(unquoted);
+ RE re(quoted, options);
+ CHECK(re.FullMatch(unquoted));
+}
+
+// A string containing meaningful regexp characters, which is then meta-
+// quoted, should not generally match a string the unquoted string does.
+static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
+ RE_Options options = RE_Options()) {
+ string quoted = RE::QuoteMeta(unquoted);
+ RE re(quoted, options);
+ CHECK(!re.FullMatch(should_not_match));
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+static void TestQuotaMetaSimple() {
+ TestQuoteMeta("foo");
+ TestQuoteMeta("foo.bar");
+ TestQuoteMeta("foo\\.bar");
+ TestQuoteMeta("[1-9]");
+ TestQuoteMeta("1.5-2.0?");
+ TestQuoteMeta("\\d");
+ TestQuoteMeta("Who doesn't like ice cream?");
+ TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+ TestQuoteMeta("((?!)xxx).*yyy");
+ TestQuoteMeta("([");
+}
+
+static void TestQuoteMetaSimpleNegative() {
+ NegativeTestQuoteMeta("foo", "bar");
+ NegativeTestQuoteMeta("...", "bar");
+ NegativeTestQuoteMeta("\\.", ".");
+ NegativeTestQuoteMeta("\\.", "..");
+ NegativeTestQuoteMeta("(a)", "a");
+ NegativeTestQuoteMeta("(a|b)", "a");
+ NegativeTestQuoteMeta("(a|b)", "(a)");
+ NegativeTestQuoteMeta("(a|b)", "a|b");
+ NegativeTestQuoteMeta("[0-9]", "0");
+ NegativeTestQuoteMeta("[0-9]", "0-9");
+ NegativeTestQuoteMeta("[0-9]", "[9]");
+ NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+static void TestQuoteMetaLatin1() {
+ TestQuoteMeta("3\xb2 = 9");
+}
+
+static void TestQuoteMetaUtf8() {
+#ifdef SUPPORT_UTF8
+ TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
+ TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
+ TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
+ TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
+ TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
+ TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
+ TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
+ NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
+ "27\\\xc2\\\xb0",
+ pcrecpp::UTF8());
+#endif
+}
+
+static void TestQuoteMetaAll() {
+ printf("Testing QuoteMeta\n");
+ TestQuotaMetaSimple();
+ TestQuoteMetaSimpleNegative();
+ TestQuoteMetaLatin1();
+ TestQuoteMetaUtf8();
+}
+
+//
+// Options tests contributed by
+// Giuseppe Maxia, CTO, Stardata s.r.l.
+// July 2005
+//
+static void GetOneOptionResult(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ string expected) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if(VERBOSE_TEST)
+ printf("/%s/ finds \"%s\" within \"%s\" \n",
+ regex,
+ expected.c_str(),
+ str);
+ string captured("");
+ if (full)
+ RE(regex,options).FullMatch(str, &captured);
+ else
+ RE(regex,options).PartialMatch(str, &captured);
+ CHECK_EQ(captured, expected);
+}
+
+static void TestOneOption(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ bool assertive = true) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if (VERBOSE_TEST)
+ printf("'%s' %s /%s/ \n",
+ str,
+ (assertive? "matches" : "doesn't match"),
+ regex);
+ if (assertive) {
+ if (full)
+ CHECK(RE(regex,options).FullMatch(str));
+ else
+ CHECK(RE(regex,options).PartialMatch(str));
+ } else {
+ if (full)
+ CHECK(!RE(regex,options).FullMatch(str));
+ else
+ CHECK(!RE(regex,options).PartialMatch(str));
+ }
+}
+
+static void Test_CASELESS() {
+ RE_Options options;
+ RE_Options options2;
+
+ options.set_caseless(true);
+ TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
+ TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
+ TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
+
+ TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
+ TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
+ options.set_caseless(false);
+ TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
+}
+
+static void Test_MULTILINE() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world\n";
+
+ options.set_multiline(true);
+ TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
+ TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
+ TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
+ options.set_multiline(false);
+ TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
+}
+
+static void Test_DOTALL() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world";
+
+ options.set_dotall(true);
+ TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
+ TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
+ TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
+ options.set_dotall(false);
+ TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
+}
+
+static void Test_DOLLAR_ENDONLY() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world\n";
+
+ TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
+ options.set_dollar_endonly(true);
+ TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
+ TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
+}
+
+static void Test_EXTRA() {
+ RE_Options options;
+ const char *str = "HELLO";
+
+ options.set_extra(true);
+ TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
+ TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
+ options.set_extra(false);
+ TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
+}
+
+static void Test_EXTENDED() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world";
+
+ options.set_extended(true);
+ TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
+ TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
+ TestOneOption("EXTENDED (class)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ options,
+ false);
+
+ TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
+ TestOneOption("EXTENDED (function)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ pcrecpp::EXTENDED(),
+ false);
+
+ options.set_extended(false);
+ TestOneOption("no EXTENDED", "HELLO world", str, options, false);
+}
+
+static void Test_NO_AUTO_CAPTURE() {
+ RE_Options options;
+ const char *str = "HELLO world";
+ string captured;
+
+ printf("Testing Option <no NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses capture text\n");
+ RE re("(world|universe)$", options);
+ CHECK(re.Extract("\\1", str , &captured));
+ CHECK_EQ(captured, "world");
+ options.set_no_auto_capture(true);
+ printf("testing Option <NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses do not capture text\n");
+ re.Extract("\\1",str, &captured );
+ CHECK_EQ(captured, "world");
+}
+
+static void Test_UNGREEDY() {
+ RE_Options options;
+ const char *str = "HELLO, 'this' is the 'world'";
+
+ options.set_ungreedy(true);
+ GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
+ GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
+ GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
+
+ options.set_ungreedy(false);
+ GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
+ GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
+}
+
+static void Test_all_options() {
+ const char *str = "HELLO\n" "cruel\n" "world";
+ RE_Options options;
+ options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
+
+ TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
+ options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
+ TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
+ false);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options()
+ .set_multiline(true)
+ .set_extended(true),
+ false);
+
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
+
+}
+
+static void TestOptions() {
+ printf("Testing Options\n");
+ Test_CASELESS();
+ Test_MULTILINE();
+ Test_DOTALL();
+ Test_DOLLAR_ENDONLY();
+ Test_EXTENDED();
+ Test_NO_AUTO_CAPTURE();
+ Test_UNGREEDY();
+ Test_EXTRA();
+ Test_all_options();
+}
+
+static void TestConstructors() {
+ printf("Testing constructors\n");
+
+ RE_Options options;
+ options.set_dotall(true);
+ const char *str = "HELLO\n" "cruel\n" "world";
+
+ RE orig("HELLO.*world", options);
+ CHECK(orig.FullMatch(str));
+
+ RE copy1(orig);
+ CHECK(copy1.FullMatch(str));
+
+ RE copy2("not a match");
+ CHECK(!copy2.FullMatch(str));
+ copy2 = copy1;
+ CHECK(copy2.FullMatch(str));
+ copy2 = orig;
+ CHECK(copy2.FullMatch(str));
+
+ // Make sure when we assign to ourselves, nothing bad happens
+ orig = orig;
+ copy1 = copy1;
+ copy2 = copy2;
+ CHECK(orig.FullMatch(str));
+ CHECK(copy1.FullMatch(str));
+ CHECK(copy2.FullMatch(str));
+}
+
+int main(int argc, char** argv) {
+ // Treat any flag as --help
+ if (argc > 1 && argv[1][0] == '-') {
+ printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
+ " If 'timingX ###' is specified, run the given timing test\n"
+ " with the given number of iterations, rather than running\n"
+ " the default corectness test.\n", argv[0]);
+ return 0;
+ }
+
+ if (argc > 1) {
+ if ( argc == 2 || atoi(argv[2]) == 0) {
+ printf("timing mode needs a num-iters argument\n");
+ return 1;
+ }
+ if (!strcmp(argv[1], "timing1"))
+ Timing1(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing2"))
+ Timing2(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing3"))
+ Timing3(atoi(argv[2]));
+ else
+ printf("Unknown argument '%s'\n", argv[1]);
+ return 0;
+ }
+
+ printf("Testing FullMatch\n");
+
+ int i;
+ string s;
+
+ /***** FullMatch with no args *****/
+
+ CHECK(RE("h.*o").FullMatch("hello"));
+ CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
+ CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
+ CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
+ CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
+ CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
+
+ /***** FullMatch with args *****/
+
+ // Zero-arg
+ CHECK(RE("\\d+").FullMatch("1001"));
+
+ // Single-arg
+ CHECK(RE("(\\d+)").FullMatch("1001", &i));
+ CHECK_EQ(i, 1001);
+ CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
+ CHECK_EQ(i, -123);
+ CHECK(!RE("()\\d+").FullMatch("10", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
+ &i));
+
+ // Digits surrounding integer-arg
+ CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
+ CHECK_EQ(i, 23);
+ CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+ CHECK(RE("(\\d)").PartialMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+
+ // String-arg
+ CHECK(RE("h(.*)o").FullMatch("hello", &s));
+ CHECK_EQ(s, string("ell"));
+
+ // StringPiece-arg
+ StringPiece sp;
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
+ CHECK_EQ(sp.size(), 4);
+ CHECK(memcmp(sp.data(), "ruby", 4) == 0);
+ CHECK_EQ(i, 1234);
+
+ // Multi-arg
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Ignored arg
+ CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Type tests
+ {
+ char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, 'H');
+ }
+ {
+ unsigned char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, static_cast<unsigned char>('H'));
+ }
+ {
+ short v;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
+ CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
+ }
+ {
+ unsigned short v;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
+ CHECK(!RE("(\\d+)").FullMatch("65536", &v));
+ }
+ {
+ int v;
+ static const int max_value = 0x7fffffff;
+ static const int min_value = -max_value - 1;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
+ CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
+ }
+ {
+ unsigned int v;
+ static const unsigned int max_value = 0xfffffffful;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
+ CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
+ }
+#ifdef HAVE_LONG_LONG
+# if defined(__MINGW__) || defined(__MINGW32__)
+# define LLD "%I64d"
+# define LLU "%I64u"
+# else
+# define LLD "%lld"
+# define LLU "%llu"
+# endif
+ {
+ long long v;
+ static const long long max_value = 0x7fffffffffffffffLL;
+ static const long long min_value = -max_value - 1;
+ char buf[32]; // definitely big enough for a long long
+
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
+
+ sprintf(buf, LLD, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ sprintf(buf, LLD, min_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
+
+ sprintf(buf, LLD, max_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+
+ sprintf(buf, LLD, min_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
+ {
+ unsigned long long v;
+ long long v2;
+ static const unsigned long long max_value = 0xffffffffffffffffULL;
+ char buf[32]; // definitely big enough for a unsigned long long
+
+ CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
+
+ sprintf(buf, LLU, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+ {
+ float v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+ {
+ double v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+
+ // Check that matching is fully anchored
+ CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
+ CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
+ CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
+
+ // Braces
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
+ CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
+
+ // Complicated RE
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
+ CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
+
+ // Check full-match handling (needs '$' tacked on internally)
+ CHECK(RE("fo|foo").FullMatch("fo"));
+ CHECK(RE("fo|foo").FullMatch("foo"));
+ CHECK(RE("fo|foo$").FullMatch("fo"));
+ CHECK(RE("fo|foo$").FullMatch("foo"));
+ CHECK(RE("foo$").FullMatch("foo"));
+ CHECK(!RE("foo\\$").FullMatch("foo$bar"));
+ CHECK(!RE("fo|bar").FullMatch("fox"));
+
+ // Uncomment the following if we change the handling of '$' to
+ // prevent it from matching a trailing newline
+ if (false) {
+ // Check that we don't get bitten by pcre's special handling of a
+ // '\n' at the end of the string matching '$'
+ CHECK(!RE("foo$").PartialMatch("foo\n"));
+ }
+
+ // Number of args
+ int a[16];
+ CHECK(RE("").FullMatch(""));
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d){1}").FullMatch("1",
+ &a[0]));
+ CHECK_EQ(a[0], 1);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)").FullMatch("12",
+ &a[0], &a[1]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
+ &a[0], &a[1], &a[2]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
+ &a[0], &a[1], &a[2], &a[3]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4], &a[5]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+ "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
+ "1234567890123456",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7],
+ &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+ CHECK_EQ(a[7], 8);
+ CHECK_EQ(a[8], 9);
+ CHECK_EQ(a[9], 0);
+ CHECK_EQ(a[10], 1);
+ CHECK_EQ(a[11], 2);
+ CHECK_EQ(a[12], 3);
+ CHECK_EQ(a[13], 4);
+ CHECK_EQ(a[14], 5);
+ CHECK_EQ(a[15], 6);
+
+ /***** PartialMatch *****/
+
+ printf("Testing PartialMatch\n");
+
+ CHECK(RE("h.*o").PartialMatch("hello"));
+ CHECK(RE("h.*o").PartialMatch("othello"));
+ CHECK(RE("h.*o").PartialMatch("hello!"));
+ CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
+
+ /***** other tests *****/
+
+ RadixTests();
+ TestReplace();
+ TestExtract();
+ TestConsume();
+ TestFindAndConsume();
+ TestQuoteMetaAll();
+ TestMatchNumberPeculiarity();
+
+ // Check the pattern() accessor
+ {
+ const string kPattern = "http://([^/]+)/.*";
+ const RE re(kPattern);
+ CHECK_EQ(kPattern, re.pattern());
+ }
+
+ // Check RE error field.
+ {
+ RE re("foo");
+ CHECK(re.error().empty()); // Must have no error
+ }
+
+#ifdef SUPPORT_UTF8
+ // Check UTF-8 handling
+ {
+ printf("Testing UTF-8 handling\n");
+
+ // Three Japanese characters (nihongo)
+ const unsigned char utf8_string[] = {
+ 0xe6, 0x97, 0xa5, // 65e5
+ 0xe6, 0x9c, 0xac, // 627c
+ 0xe8, 0xaa, 0x9e, // 8a9e
+ 0
+ };
+ const unsigned char utf8_pattern[] = {
+ '.',
+ 0xe6, 0x9c, 0xac, // 627c
+ '.',
+ 0
+ };
+
+ // Both should match in either mode, bytes or UTF-8
+ RE re_test1(".........");
+ CHECK(re_test1.FullMatch(utf8_string));
+ RE re_test2("...", pcrecpp::UTF8());
+ CHECK(re_test2.FullMatch(utf8_string));
+
+ // Check that '.' matches one byte or UTF-8 character
+ // according to the mode.
+ string ss;
+ RE re_test3("(.)");
+ CHECK(re_test3.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6"));
+ RE re_test4("(.)", pcrecpp::UTF8());
+ CHECK(re_test4.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6\x97\xa5"));
+
+ // Check that string matches itself in either mode
+ RE re_test5(utf8_string);
+ CHECK(re_test5.FullMatch(utf8_string));
+ RE re_test6(utf8_string, pcrecpp::UTF8());
+ CHECK(re_test6.FullMatch(utf8_string));
+
+ // Check that pattern matches string only in UTF8 mode
+ RE re_test7(utf8_pattern);
+ CHECK(!re_test7.FullMatch(utf8_string));
+ RE re_test8(utf8_pattern, pcrecpp::UTF8());
+ CHECK(re_test8.FullMatch(utf8_string));
+ }
+
+ // Check that ungreedy, UTF8 regular expressions don't match when they
+ // oughtn't -- see bug 82246.
+ {
+ // This code always worked.
+ const char* pattern = "\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+
+ {
+ const char* pattern = "(?U)\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+#endif /* def SUPPORT_UTF8 */
+
+ printf("Testing error reporting\n");
+
+ { RE re("a\\1"); CHECK(!re.error().empty()); }
+ {
+ RE re("a[x");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[z-a]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[[:foobar:]]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a(b");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a\\");
+ CHECK(!re.error().empty());
+ }
+
+ // Test that recursion is stopped
+ TestRecursion();
+
+ // Test Options
+ if (getenv("VERBOSE_TEST") != NULL)
+ VERBOSE_TEST = true;
+ TestOptions();
+
+ // Test the constructors
+ TestConstructors();
+
+ // Done
+ printf("OK\n");
+
+ return 0;
+}
Added: httpd/httpd/vendor/pcre/current/pcrecpparg.h.in
URL: http://svn.apache.org/viewvc/httpd/httpd/vendor/pcre/current/pcrecpparg.h.in?rev=598339&view=auto
==============================================================================
--- httpd/httpd/vendor/pcre/current/pcrecpparg.h.in (added)
+++ httpd/httpd/vendor/pcre/current/pcrecpparg.h.in Mon Nov 26 08:49:53 2007
@@ -0,0 +1,173 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+
+#ifndef _PCRECPPARG_H
+#define _PCRECPPARG_H
+
+#include <stdlib.h> // for NULL
+#include <string>
+
+#include <pcre.h>
+
+namespace pcrecpp {
+
+class StringPiece;
+
+// Hex/Octal/Binary?
+
+// Special class for parsing into objects that define a ParseFrom() method
+template <class T>
+class _RE_MatchObject {
+ public:
+ static inline bool Parse(const char* str, int n, void* dest) {
+ T* object = reinterpret_cast<T*>(dest);
+ return object->ParseFrom(str, n);
+ }
+};
+
+class PCRECPP_EXP_DEFN Arg {
+ public:
+ // Empty constructor so we can declare arrays of Arg
+ Arg();
+
+ // Constructor specially designed for NULL arguments
+ Arg(void*);
+
+ typedef bool (*Parser)(const char* str, int n, void* dest);
+
+// Type-specific parsers
+#define PCRE_MAKE_PARSER(type,name) \
+ Arg(type* p) : arg_(p), parser_(name) { } \
+ Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
+
+
+ PCRE_MAKE_PARSER(char, parse_char);
+ PCRE_MAKE_PARSER(unsigned char, parse_uchar);
+ PCRE_MAKE_PARSER(short, parse_short);
+ PCRE_MAKE_PARSER(unsigned short, parse_ushort);
+ PCRE_MAKE_PARSER(int, parse_int);
+ PCRE_MAKE_PARSER(unsigned int, parse_uint);
+ PCRE_MAKE_PARSER(long, parse_long);
+ PCRE_MAKE_PARSER(unsigned long, parse_ulong);
+#if @pcre_have_long_long@
+ PCRE_MAKE_PARSER(long long, parse_longlong);
+#endif
+#if @pcre_have_ulong_long@
+ PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
+#endif
+ PCRE_MAKE_PARSER(float, parse_float);
+ PCRE_MAKE_PARSER(double, parse_double);
+ PCRE_MAKE_PARSER(std::string, parse_string);
+ PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
+
+#undef PCRE_MAKE_PARSER
+
+ // Generic constructor
+ template <class T> Arg(T*, Parser parser);
+ // Generic constructor template
+ template <class T> Arg(T* p)
+ : arg_(p), parser_(_RE_MatchObject<T>::Parse) {
+ }
+
+ // Parse the data
+ bool Parse(const char* str, int n) const;
+
+ private:
+ void* arg_;
+ Parser parser_;
+
+ static bool parse_null (const char* str, int n, void* dest);
+ static bool parse_char (const char* str, int n, void* dest);
+ static bool parse_uchar (const char* str, int n, void* dest);
+ static bool parse_float (const char* str, int n, void* dest);
+ static bool parse_double (const char* str, int n, void* dest);
+ static bool parse_string (const char* str, int n, void* dest);
+ static bool parse_stringpiece (const char* str, int n, void* dest);
+
+#define PCRE_DECLARE_INTEGER_PARSER(name) \
+ private: \
+ static bool parse_ ## name(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _radix( \
+ const char* str, int n, void* dest, int radix); \
+ public: \
+ static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
+
+ PCRE_DECLARE_INTEGER_PARSER(short);
+ PCRE_DECLARE_INTEGER_PARSER(ushort);
+ PCRE_DECLARE_INTEGER_PARSER(int);
+ PCRE_DECLARE_INTEGER_PARSER(uint);
+ PCRE_DECLARE_INTEGER_PARSER(long);
+ PCRE_DECLARE_INTEGER_PARSER(ulong);
+ PCRE_DECLARE_INTEGER_PARSER(longlong);
+ PCRE_DECLARE_INTEGER_PARSER(ulonglong);
+
+#undef PCRE_DECLARE_INTEGER_PARSER
+};
+
+inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
+inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
+
+inline bool Arg::Parse(const char* str, int n) const {
+ return (*parser_)(str, n, arg_);
+}
+
+// This part of the parser, appropriate only for ints, deals with bases
+#define MAKE_INTEGER_PARSER(type, name) \
+ inline Arg Hex(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _hex); } \
+ inline Arg Octal(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _octal); } \
+ inline Arg CRadix(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _cradix); }
+
+MAKE_INTEGER_PARSER(short, short) /* */
+MAKE_INTEGER_PARSER(unsigned short, ushort) /* */
+MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */
+MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */
+MAKE_INTEGER_PARSER(long, long) /* because they can cause */
+MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */
+#if @pcre_have_long_long@ /* the checking level is */
+MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */
+#endif /* */
+#if @pcre_have_ulong_long@ /* */
+MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */
+#endif
+
+#undef PCRE_IS_SET
+#undef PCRE_SET_OR_CLEAR
+#undef MAKE_INTEGER_PARSER
+
+} // namespace pcrecpp
+
+
+#endif /* _PCRECPPARG_H */
Modified: httpd/httpd/vendor/pcre/current/pcredemo.c
URL: http://svn.apache.org/viewvc/httpd/httpd/vendor/pcre/current/pcredemo.c?rev=598339&r1=598338&r2=598339&view=diff
==============================================================================
--- httpd/httpd/vendor/pcre/current/pcredemo.c (original)
+++ httpd/httpd/vendor/pcre/current/pcredemo.c Mon Nov 26 08:49:53 2007
@@ -11,7 +11,8 @@
-R/usr/local/lib -lpcre
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
-library files for PCRE are installed on your system. Only some operating
+library files for PCRE are installed on your system. You don't need -I and -L
+if PCRE is installed in the standard system libraries. Only some operating
systems (e.g. Solaris) use the -R option.
*/
@@ -117,7 +118,7 @@
*/
default: printf("Matching error %d\n", rc); break;
}
- free(re); /* Release memory used for the compiled pattern */
+ pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
@@ -223,8 +224,8 @@
if (!find_all)
{
- free(re); /* Release the memory used for the compiled pattern */
- return 0; /* Finish unless -g was given */
+ pcre_free(re); /* Release the memory used for the compiled pattern */
+ return 0; /* Finish unless -g was given */
}
/* Loop for second and subsequent matches */
@@ -276,7 +277,7 @@
if (rc < 0)
{
printf("Matching error %d\n", rc);
- free(re); /* Release memory used for the compiled pattern */
+ pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
@@ -317,7 +318,7 @@
} /* End of loop to find second and subsequent matches */
printf("\n");
-free(re); /* Release memory used for the compiled pattern */
+pcre_free(re); /* Release memory used for the compiled pattern */
return 0;
}
Added: httpd/httpd/vendor/pcre/current/pcregexp.pas
URL: http://svn.apache.org/viewvc/httpd/httpd/vendor/pcre/current/pcregexp.pas?rev=598339&view=auto
==============================================================================
--- httpd/httpd/vendor/pcre/current/pcregexp.pas (added)
+++ httpd/httpd/vendor/pcre/current/pcregexp.pas Mon Nov 26 08:49:53 2007
@@ -0,0 +1,783 @@
+{
+ pcRegExp - Perl compatible regular expressions for Virtual Pascal
+ (c) 2001 Peter S. Voronov aka Chem O'Dun <pe...@yahoo.com>
+
+ Based on PCRE library interface unit for Virtual Pascal.
+ (c) 2001 Alexander Tokarev <dw...@dwalin.ru>
+
+ The current PCRE version is: 3.7
+
+ This software must be distributed as Freeware.
+
+ The PCRE library is written by: Philip Hazel <ph...@cam.ac.uk>
+ Copyright (c) 1997-2004 University of Cambridge
+
+ AngelsHolocaust 4-11-04 updated to use version v5.0
+ (INFO: this is regex-directed, NFA)
+ AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now
+ everything works as it should (no more crashes)
+ -> removed CheckRegExp because pcre handles errors perfectly
+ 10-11-04 - added pcError (errorhandling), pcInit
+ 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset
+ 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr
+ 17-02-06 - added RunTimeOptions: caller can set options while searching
+ 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset
+ into the string itself
+ 20-12-06 - support for version 7.0
+}
+
+{$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0}
+
+Unit pcregexp;
+
+Interface
+
+uses objects;
+
+Type
+ PpcRegExp = ^TpcRegExp;
+// TpcRegExp = object
+ TpcRegExp = object(TObject)
+ MatchesCount: integer;
+ RegExpC, RegExpExt : Pointer;
+ Matches:Pointer;
+ RegExp: shortstring;
+ SourceLen: integer;
+ PartialMatch : boolean;
+ Error : boolean;
+ ErrorMsg : Pchar;
+ ErrorPos : integer;
+ RunTimeOptions: Integer; // options which can be set by the caller
+ constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer);
+ function Search(AStr: Pchar; ALen : longint) : boolean; virtual;
+ function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual;
+ function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual;
+ function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual;
+ function MatchFull(var Pos, Len : longint) : boolean; virtual;
+ function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual;
+ function GetFullStr(AStr: Pchar) : string; virtual;
+ function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual;
+ function GetPreSubStr(AStr: Pchar) : string; virtual;
+ function GetPostSubStr(AStr: Pchar) : string; virtual;
+ function ErrorStr : string; virtual;
+ destructor Done; virtual;
+ end;
+
+ function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
+ function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
+
+ function pcFastGrepMatch(WildCard, aStr: string): Boolean;
+ function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
+
+{$IFDEF PCRE_5_0}
+ function pcGetVersion : pchar;
+{$ENDIF}
+
+ function pcError (var pRegExp : Pointer) : Boolean;
+ function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer;
+
+Const { Options }
+ PCRE_CASELESS = $0001;
+ PCRE_MULTILINE = $0002;
+ PCRE_DOTALL = $0004;
+ PCRE_EXTENDED = $0008;
+ PCRE_ANCHORED = $0010;
+ PCRE_DOLLAR_ENDONLY = $0020;
+ PCRE_EXTRA = $0040;
+ PCRE_NOTBOL = $0080;
+ PCRE_NOTEOL = $0100;
+ PCRE_UNGREEDY = $0200;
+ PCRE_NOTEMPTY = $0400;
+{$IFDEF PCRE_5_0}
+ PCRE_UTF8 = $0800;
+ PCRE_NO_AUTO_CAPTURE = $1000;
+ PCRE_NO_UTF8_CHECK = $2000;
+ PCRE_AUTO_CALLOUT = $4000;
+ PCRE_PARTIAL = $8000;
+{$ENDIF}
+{$IFDEF PCRE_7_0}
+ PCRE_DFA_SHORTEST = $00010000;
+ PCRE_DFA_RESTART = $00020000;
+ PCRE_FIRSTLINE = $00040000;
+ PCRE_DUPNAMES = $00080000;
+ PCRE_NEWLINE_CR = $00100000;
+ PCRE_NEWLINE_LF = $00200000;
+ PCRE_NEWLINE_CRLF = $00300000;
+ PCRE_NEWLINE_ANY = $00400000;
+ PCRE_NEWLINE_ANYCRLF = $00500000;
+{$ENDIF}
+
+ PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS +
+ PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED +
+ PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE +
+ PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK
+ {$IFDEF PCRE_7_0}
+ + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_CRLF
+ + PCRE_NEWLINE_ANY + PCRE_NEWLINE_CRLF
+ {$ENDIF}
+ ;
+
+ PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
+ PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL
+ {$IFDEF PCRE_7_0}
+ + PCRE_NEWLINE_CRLF + PCRE_NEWLINE_ANY +PCRE_NEWLINE_ANYCRLF
+ {$ENDIF}
+ ;
+
+{$IFDEF PCRE_7_0}
+ PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
+ PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL +
+ PCRE_DFA_SHORTEST + PCRE_DFA_RESTART +
+ PCRE_NEWLINE_CR + PCRE_NEWLINE_LF + PCRE_NEWLINE_CRLF +
+ PCRE_NEWLINE_ANY + PCRE_NEWLINE_ANYCRLF
+{$ENDIF}
+
+{ Exec-time and get/set-time error codes }
+ PCRE_ERROR_NOMATCH = -1;
+ PCRE_ERROR_NULL = -2;
+ PCRE_ERROR_BADOPTION = -3;
+ PCRE_ERROR_BADMAGIC = -4;
+ PCRE_ERROR_UNKNOWN_MODE = -5;
+ PCRE_ERROR_NOMEMORY = -6;
+ PCRE_ERROR_NOSUBSTRING = -7;
+{$IFDEF PCRE_5_0}
+ PCRE_ERROR_MATCHLIMIT = -8;
+ PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself }
+ PCRE_ERROR_BADUTF8 = -10;
+ PCRE_ERROR_BADUTF8_OFFSET = -11;
+ PCRE_ERROR_PARTIAL = -12;
+ PCRE_ERROR_BADPARTIAL = -13;
+ PCRE_ERROR_INTERNAL = -14;
+ PCRE_ERROR_BADCOUNT = -15;
+{$ENDIF}
+{$IFDEF PCRE_7_0}
+ PCRE_ERROR_DFA_UITEM = -16;
+ PCRE_ERROR_DFA_UCOND = -17;
+ PCRE_ERROR_DFA_UMLIMIT = -18;
+ PCRE_ERROR_DFA_WSSIZE = -19;
+ PCRE_ERROR_DFA_RECURSE = -20;
+ PCRE_ERROR_RECURSIONLIMIT = -21;
+ PCRE_ERROR_NULLWSLIMIT = -22;
+ PCRE_ERROR_BADNEWLINE = -23;
+{$ENDIF}
+
+{ Request types for pcre_fullinfo() }
+
+ PCRE_INFO_OPTIONS = 0;
+ PCRE_INFO_SIZE = 1;
+ PCRE_INFO_CAPTURECOUNT = 2;
+ PCRE_INFO_BACKREFMAX = 3;
+ PCRE_INFO_FIRSTBYTE = 4;
+ PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility }
+ PCRE_INFO_FIRSTTABLE = 5;
+{$IFDEF PCRE_5_0}
+ PCRE_INFO_LASTLITERAL = 6;
+ PCRE_INFO_NAMEENTRYSIZE = 7;
+ PCRE_INFO_NAMECOUNT = 8;
+ PCRE_INFO_NAMETABLE = 9;
+ PCRE_INFO_STUDYSIZE = 10;
+ PCRE_INFO_DEFAULT_TABLES = 11;
+{$ENDIF PCRE_5_0}
+
+{ Request types for pcre_config() }
+{$IFDEF PCRE_5_0}
+ PCRE_CONFIG_UTF8 = 0;
+ PCRE_CONFIG_NEWLINE = 1;
+ PCRE_CONFIG_LINK_SIZE = 2;
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
+ PCRE_CONFIG_MATCH_LIMIT = 4;
+ PCRE_CONFIG_STACKRECURSE = 5;
+ PCRE_CONFIG_UNICODE_PROPERTIES = 6;
+{$ENDIF PCRE_5_0}
+{$IFDEF PCRE_7_0}
+ PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7;
+{$ENDIF}
+
+{ Bit flags for the pcre_extra structure }
+{$IFDEF PCRE_5_0}
+ PCRE_EXTRA_STUDY_DATA = $0001;
+ PCRE_EXTRA_MATCH_LIMIT = $0002;
+ PCRE_EXTRA_CALLOUT_DATA = $0004;
+ PCRE_EXTRA_TABLES = $0008;
+{$ENDIF PCRE_5_0}
+{$IFDEF PCRE_7_0}
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010;
+{$ENDIF}
+
+Const
+// DefaultOptions : integer = 0;
+ DefaultLocaleTable : pointer = nil;
+
+{$IFDEF PCRE_5_0}
+{ The structure for passing additional data to pcre_exec(). This is defined in
+such as way as to be extensible. Always add new fields at the end, in order to
+remain compatible. }
+
+type ppcre_extra = ^tpcre_extra;
+ tpcre_extra = record
+ flags : longint; { Bits for which fields are set }
+ study_data : pointer; { Opaque data from pcre_study() }
+ match_limit : longint; { Maximum number of calls to match() }
+ callout_data : pointer; { Data passed back in callouts }
+ tables : pointer; { Pointer to character tables }
+ match_limit_recursion: longint; { Max recursive calls to match() }
+ end;
+
+type ppcre_callout_block = ^pcre_callout_block;
+ pcre_callout_block = record
+ version,
+ (* ------------------------ Version 0 ------------------------------- *)
+ callout_number : integer;
+ offset_vector : pointer;
+ subject : pchar;
+ subject_length, start_match, current_position, capture_top,
+ capture_last : integer;
+ callout_data : pointer;
+ (* ------------------- Added for Version 1 -------------------------- *)
+ pattern_position, next_item_length : integer;
+ end;
+{$ENDIF PCRE_5_0}
+
+{$OrgName+}
+{$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
+
+ { local replacement of external pcre memory management functions }
+ function pcre_malloc( size : integer ) : pointer;
+ procedure pcre_free( {var} p : pointer );
+{$IFDEF PCRE_5_0}
+ const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc;
+ pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free;
+ function pcre_callout(var p : ppcre_callout_block) : integer;
+{$ENDIF PCRE_5_0}
+{$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
+
+Implementation
+
+Uses strings, collect, messages, dnapp, commands, advance0, stringsx
+ {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL};
+
+Const
+ MAGIC_NUMBER = $50435245; { 'PCRE' }
+ MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64}
+
+Type
+ PMatchArray = ^TMatchArray;
+ TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer;
+
+ PRegExpCollection = ^TRegExpCollection;
+ TRegExpCollection = object(TSortedCollection)
+ MaxRegExp : integer;
+ SearchRegExp : shortstring;
+ CompareModeInsert : boolean;
+ constructor Init(AMaxRegExp:integer);
+ procedure FreeItem(P: Pointer); virtual;
+ function Compare(P1, P2: Pointer): Integer; virtual;
+ function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual;
+ function CheckNew(ARegExp:shortstring):PpcRegExp;virtual;
+ end;
+
+Var
+ PRegExpCache : PRegExpCollection;
+
+
+{$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
+
+ { imported original pcre functions }
+
+ function pcre_compile( const pattern : PChar; options : integer;
+ var errorptr : PChar; var erroroffset : integer;
+ const tables : PChar ) : pointer {pcre}; external;
+{$IFDEF PCRE_7_0}
+ function pcre_compile2( const pattern : PChar; options : integer;
+ var errorcodeptr : Integer;
+ var errorptr : PChar; var erroroffset : integer;
+ const tables : PChar ) : pointer {pcre}; external;
+{$ENDIF}
+{$IFDEF PCRE_5_0}
+ function pcre_config( what : integer; where : pointer) : integer; external;
+ function pcre_copy_named_substring( const code : pointer {pcre};
+ const subject : pchar;
+ var ovector : integer;
+ stringcount : integer;
+ const stringname : pchar;
+ var buffer : pchar;
+ size : integer) : integer; external;
+ function pcre_copy_substring( const subject : pchar; var ovector : integer;
+ stringcount, stringnumber : integer;
+ var buffer : pchar; size : integer )
+ : integer; external;
+ function pcre_exec( const argument_re : pointer {pcre};
+ const extra_data : pointer {pcre_extra};
+{$ELSE}
+ function pcre_exec( const external_re : pointer;
+ const external_extra : pointer;
+{$ENDIF}
+ const subject : PChar;
+ length, start_offset, options : integer;
+ offsets : pointer;
+ offsetcount : integer ) : integer; external;
+{$IFDEF PCRE_7_0}
+ function pcre_dfa_exec( const argument_re : pointer {pcre};
+ const extra_data : pointer {pcre_extra};
+ const subject : pchar;
+ length, start_offset, options : integer;
+ offsets : pointer;
+ offsetcount : integer;
+ workspace : pointer;
+ wscount : integer ) : integer; external;
+{$ENDIF}
+{$IFDEF PCRE_5_0}
+ procedure pcre_free_substring( const p : pchar ); external;
+ procedure pcre_free_substring_list( var p : pchar ); external;
+ function pcre_fullinfo( const argument_re : pointer {pcre};
+ const extra_data : pointer {pcre_extra};
+ what : integer;
+ where : pointer ) : integer; external;
+ function pcre_get_named_substring( const code : pointer {pcre};
+ const subject : pchar;
+ var ovector : integer;
+ stringcount : integer;
+ const stringname : pchar;
+ var stringptr : pchar ) : integer; external;
+ function pcre_get_stringnumber( const code : pointer {pcre};
+ const stringname : pchar ) : integer; external;
+ function pcre_get_stringtable_entries( const code : pointer {pcre};
+ const stringname : pchar;
+ var firstptr,
+ lastptr : pchar ) : integer; external;
+ function pcre_get_substring( const subject : pchar; var ovector : integer;
+ stringcount, stringnumber : integer;
+ var stringptr : pchar ) : integer; external;
+ function pcre_get_substring_list( const subject : pchar; var ovector : integer;
+ stringcount : integer;
+ listptr : pointer {const char ***listptr}) : integer; external;
+ function pcre_info( const argument_re : pointer {pcre};
+ var optptr : integer;
+ var first_byte : integer ) : integer; external;
+ function pcre_maketables : pchar; external;
+{$ENDIF}
+{$IFDEF PCRE_7_0}
+ function pcre_refcount( const argument_re : pointer {pcre};
+ adjust : integer ) : pchar; external;
+{$ENDIF}
+ function pcre_study( const external_re : pointer {pcre};
+ options : integer;
+ var errorptr : PChar ) : pointer {pcre_extra}; external;
+{$IFDEF PCRE_5_0}
+ function pcre_version : pchar; external;
+{$ENDIF}
+
+ function pcre_malloc( size : integer ) : pointer;
+ begin
+ GetMem( result, size );
+ end;
+
+ procedure pcre_free( {var} p : pointer );
+ begin
+ if (p <> nil) then
+ FreeMem( p, 0 );
+ {@p := nil;}
+ end;
+
+{$IFDEF PCRE_5_0}
+(* Called from PCRE as a result of the (?C) item. We print out where we are in
+the match. Yield zero unless more callouts than the fail count, or the callout
+data is not zero. *)
+
+ function pcre_callout;
+ begin
+ end;
+{$ENDIF}
+
+{$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
+
+// Always include the newest version of the library
+{$IFDEF PCRE_3_7} {$IFNDEF PCRE_5_0} {$IFNDEF PCRE_7_0} {$L pcre37.lib} {$ENDIF PCRE_7_0} {$ENDIF PCRE_5_0} {$ENDIF PCRE_3_7}
+{$IFDEF PCRE_5_0} {$IFNDEF PCRE_7_0} {$L pcre50.lib} {$ENDIF PCRE_7_0} {$ENDIF PCRE_5_0}
+{$IFDEF PCRE_7_0} {$L pcre70.lib} {$ENDIF PCRE_7_0}
+
+{TpcRegExp}
+
+ constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer);
+ var
+ pRegExp : PChar;
+ begin
+ RegExp:=ARegExp;
+ RegExpC:=nil;
+ RegExpExt:=nil;
+ Matches:=nil;
+ MatchesCount:=0;
+ Error:=true;
+ ErrorMsg:=nil;
+ ErrorPos:=0;
+ RunTimeOptions := 0;
+ if length(RegExp) < 255 then
+ begin
+ RegExp[length(RegExp)+1]:=#0;
+ pRegExp:=@RegExp[1];
+ end
+ else
+ begin
+ GetMem(pRegExp,length(RegExp)+1);
+ pRegExp:=strpcopy(pRegExp,RegExp);
+ end;
+ RegExpC := pcre_compile( pRegExp,
+ AOptions and PCRE_COMPILE_ALLOWED_OPTIONS,
+ ErrorMsg, ErrorPos, ALocale);
+ if length(RegExp) = 255 then
+ StrDispose(pRegExp);
+ if RegExpC = nil then
+ exit;
+ ErrorMsg:=nil;
+ RegExpExt := pcre_study( RegExpC, 0, ErrorMsg );
+ if (RegExpExt = nil) and (ErrorMsg <> nil) then
+ begin
+ pcre_free(RegExpC);
+ exit;
+ end;
+ GetMem(Matches,SizeOf(TMatchArray));
+ Error:=false;
+ end;
+
+ destructor TpcRegExp.Done;
+ begin
+ if RegExpC <> nil then
+ pcre_free(RegExpC);
+ if RegExpExt <> nil then
+ pcre_free(RegExpExt);
+ if Matches <> nil then
+ FreeMem(Matches,SizeOf(TMatchArray));
+ end;
+
+ function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean;
+ var Options: Integer;
+ begin // must handle PCRE_ERROR_PARTIAL here
+ Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
+ PCRE_EXEC_ALLOWED_OPTIONS;
+ if MatchesCount > 0 then
+ MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1],
+ Options, Matches, MAX_MATCHES ) else
+ MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0,
+ Options, Matches, MAX_MATCHES );
+{ if MatchesCount = 0 then
+ MatchesCount := MatchesCount div 3;}
+ PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
+ SearchNext := MatchesCount > 0;
+ end;
+
+ function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean;
+ begin
+ MatchesCount:=0;
+ Search:=SearchNext(AStr,ALen);
+ SourceLen:=ALen;
+ end;
+
+ function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean;
+ var Options: Integer;
+ begin
+ MatchesCount:=0;
+ Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
+ PCRE_EXEC_ALLOWED_OPTIONS;
+ MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs,
+ Options, Matches, MAX_MATCHES );
+ PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
+ SearchOfs := MatchesCount > 0;
+ SourceLen := ALen-AOfs;
+ end;
+
+ function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean;
+ begin
+ if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then
+ begin
+ ANom:=ANom*2;
+ Pos:=PMatchArray(Matches)^[ANom];
+ Len:=PMatchArray(Matches)^[ANom+1]-Pos;
+ MatchSub:=true;
+ end
+ else
+ MatchSub:=false;
+ end;
+
+ function TpcRegExp.MatchFull(var Pos,Len:longint):boolean;
+ begin
+ MatchFull:=MatchSub(0,Pos,Len);
+ end;
+
+ function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string;
+ var
+ s: ansistring;
+ pos,len: longint;
+ begin
+ s:='';
+ if MatchSub(ANom, pos, len) then
+ begin
+ setlength(s, len);
+ Move(AStr[pos], s[1], len);
+ end;
+ GetSubStr:=s;
+ end;
+
+ function TpcRegExp.GetPreSubStr(AStr: Pchar):string;
+ var
+ s: ansistring;
+ l: longint;
+ begin
+ s:='';
+ if (MatchesCount > 0) then
+ begin
+ l:=PMatchArray(Matches)^[0]-1;
+ if l > 0 then
+ begin
+ setlength(s,l);
+ Move(AStr[1],s[1],l);
+ end;
+ end;
+ GetPreSubStr:=s;
+ end;
+
+ function TpcRegExp.GetPostSubStr(AStr: Pchar):string;
+ var
+ s: ansistring;
+ l: longint;
+ ANom: integer;
+ begin
+ s:='';
+ if (MatchesCount > 0) then
+ begin
+ ANom:=(MatchesCount-1){*2} shl 1;
+ l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1;
+ if l > 0 then
+ begin
+ setlength(s,l);
+ Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l);
+ end;
+ end;
+ GetPostSubStr:=s;
+ end;
+
+
+ function TpcRegExp.GetFullStr(AStr: Pchar):string;
+ var
+ s: ansistring;
+ l: longint;
+ begin
+ GetFullStr:=GetSubStr(0,AStr);
+ end;
+
+ function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string;
+ var
+ s: ansistring;
+ l,i,lasti: longint;
+ begin
+ l:=length(ARepl);
+ i:=1;
+ lasti:=1;
+ s:='';
+ while i <= l do
+ begin
+ case ARepl[i] of
+ '\' :
+ begin
+ if i < l then
+ begin
+ s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]};
+ {AH 17-10-05 support for POSIX \1-\9 backreferences}
+ case ARepl[i+1] of
+ '0' : s:=s+GetFullStr(AStr);
+ '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
+ else s:=s+ARepl[i+1]; // copy the escaped character
+ end;
+ end;
+ inc(i);
+ lasti:=i+1;
+ end;
+ '$' :
+ begin
+ if i < l then
+ begin
+ s:=s+copy(ARepl,lasti,i-lasti);
+ case ARepl[i+1] of
+ '&' : s:=s+GetFullStr(AStr);
+ '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
+ '`' : s:=s+GetPreSubStr(AStr);
+ #39 : s:=s+GetPostSubStr(AStr);
+ end;
+ end;
+ inc(i);
+ lasti:=i+1;
+ end;
+ end;
+ inc(i);
+ end;
+ if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then
+ s:=s+copy(ARepl,lasti,l-lasti+1);
+ GetReplStr:=s;
+ end;
+
+ function TpcRegExp.ErrorStr:string;
+ begin
+ ErrorStr:=StrPas(ErrorMsg);
+ end;
+
+{TRegExpCollection}
+
+constructor TRegExpCollection.Init(AMaxRegExp: integer);
+begin
+ Inherited Init(1,1);
+ MaxRegExp:=AMaxRegExp;
+ CompareModeInsert:=true;
+end;
+
+procedure TRegExpCollection.FreeItem(P: Pointer);
+begin
+ if P <> nil then
+ begin
+ Dispose(PpcRegExp(P),Done);
+ end;
+end;
+
+function TRegExpCollection.Compare(P1, P2: Pointer): Integer;
+//var
+// l,l1,l2,i : byte;
+//// wPos: pchar;
+begin
+ if CompareModeInsert then
+ begin
+// l1:=length(PpcRegExp(P1)^.RegExp);
+// l2:=length(PpcRegExp(P2)^.RegExp);
+// if l1 > l2 then l:=l2 else
+// l:=l1;
+// for i:=1 to l do
+// if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break;
+// if i <=l then
+// Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else
+// Compare:=l1-l2;
+ Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False);
+ end
+ else
+ begin
+// l1:=length(PpcRegExp(P1)^.RegExp);
+// l2:=length(SearchRegExp);
+// if l1 > l2 then l:=l2 else
+// l:=l1;
+// for i:=1 to l do
+// if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then
+// begin
+// Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]);
+// break;
+// end;
+// if i > l then Compare:=l1-l2;
+ Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False);
+ end;
+end;
+
+function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean;
+var I : integer;
+begin
+ CompareModeInsert:=false;
+ SearchRegExp:=ARegExp;
+ if Search(nil,I) then
+ begin
+ P:=PpcRegExp(At(I));
+ Find:=true;
+ end
+ else
+ begin
+ P:=nil;
+ Find:=false;
+ end;
+ CompareModeInsert:=true;
+end;
+
+function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp;
+var
+ P : PpcRegExp;
+begin
+ if not Find(ARegExp,P) then
+ begin
+ if Count = MaxRegExp then
+ AtFree(0);
+ P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil));
+ Insert(P);
+ end;
+ CheckNew:=P;
+end;
+
+function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
+var
+ PpcRE:PpcRegExp;
+begin
+ PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
+ pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
+ Dispose(PpcRE,Done);
+end;
+
+function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
+var
+ PpcRE:PpcRegExp;
+begin
+ PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
+ if PpcRE^.Search(pchar(AStr),Length(AStr)) then
+ pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
+ else
+ pcGrepSub:='';
+ Dispose(PpcRE,Done);
+end;
+
+function pcFastGrepMatch(WildCard, aStr: string): Boolean;
+var
+ PpcRE:PpcRegExp;
+begin
+ PpcRE:=PRegExpCache^.CheckNew(WildCard);
+ pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
+end;
+
+function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
+var
+ PpcRE:PpcRegExp;
+begin
+ PpcRE:=PRegExpCache^.CheckNew(WildCard);
+ if PpcRE^.Search(pchar(AStr),Length(AStr)) then
+ pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
+ else
+ pcFastGrepSub:='';
+end;
+
+{$IFDEF PCRE_5_0}
+function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none}
+asm
+ call pcre_version
+end;
+{$ENDIF PCRE_5_0}
+
+function pcError;
+var P: ppcRegExp absolute pRegExp;
+begin
+ Result := (P = nil) or P^.Error;
+ If Result and (P <> nil) then
+ begin
+{ if P^.ErrorPos = 0 then
+ MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton)
+ else}
+ MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos),
+ @P^.ErrorPos,mfConfirmation+mfOkButton);
+ Dispose(P, Done);
+ P:=nil;
+ end;
+end;
+
+function pcInit;
+var Options : Integer;
+begin
+ If CaseSens then Options := 0 else Options := PCRE_CASELESS;
+ Result := New( PpcRegExp, Init( Pattern,
+ {DefaultOptions}
+ startup.MiscMultiData.cfgRegEx.DefaultOptions or Options,
+ DefaultLocaleTable) );
+end;
+
+Initialization
+ PRegExpCache:=New(PRegExpCollection,Init(64));
+Finalization
+ Dispose(PRegExpCache,Done);
+End.