You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by rj...@apache.org on 2017/01/02 21:21:09 UTC

svn commit: r1777023 [5/12] - in /tomcat/jk/trunk/native/iis/pcre: ./ doc/ doc/html/ sljit/ testdata/

Modified: tomcat/jk/trunk/native/iis/pcre/doc/pcreapi.3
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/doc/pcreapi.3?rev=1777023&r1=1777022&r2=1777023&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/doc/pcreapi.3 (original)
+++ tomcat/jk/trunk/native/iis/pcre/doc/pcreapi.3 Mon Jan  2 21:21:08 2017
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "09 February 2014" "PCRE 8.35"
+.TH PCREAPI 3 "18 December 2015" "PCRE 8.39"
 .SH NAME
 PCRE - Perl-compatible regular expressions
 .sp
@@ -273,9 +273,8 @@ documentation for details of how to do t
 building PCRE, for use in environments that have limited stacks. Because of the
 greater use of memory management, it runs more slowly. Separate functions are
 provided so that special-purpose external code can be used for this case. When
-used, these functions are always called in a stack-like manner (last obtained,
-first freed), and always for memory blocks of the same size. There is a
-discussion about PCRE's stack usage in the
+used, these functions always allocate memory blocks of the same size. There is
+a discussion about PCRE's stack usage in the
 .\" HREF
 \fBpcrestack\fP
 .\"
@@ -2914,6 +2913,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 09 February 2014
-Copyright (c) 1997-2014 University of Cambridge.
+Last updated: 18 December 2015
+Copyright (c) 1997-2015 University of Cambridge.
 .fi

Modified: tomcat/jk/trunk/native/iis/pcre/pcre.h.generic
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/pcre.h.generic?rev=1777023&r1=1777022&r2=1777023&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/pcre.h.generic (original)
+++ tomcat/jk/trunk/native/iis/pcre/pcre.h.generic Mon Jan  2 21:21:08 2017
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          38
+#define PCRE_MINOR          39
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2015-11-23
+#define PCRE_DATE           2016-06-14
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate

Modified: tomcat/jk/trunk/native/iis/pcre/pcre_compile.c
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/pcre_compile.c?rev=1777023&r1=1777022&r2=1777023&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/pcre_compile.c (original)
+++ tomcat/jk/trunk/native/iis/pcre/pcre_compile.c Mon Jan  2 21:21:08 2017
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2014 University of Cambridge
+           Copyright (c) 1997-2016 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -485,7 +485,7 @@ static const char error_texts[] =
   "lookbehind assertion is not fixed length\0"
   "malformed number or name after (?(\0"
   "conditional group contains more than two branches\0"
-  "assertion expected after (?(\0"
+  "assertion expected after (?( or (?(?C)\0"
   "(?R or (?[+-]digits must be followed by )\0"
   /* 30 */
   "unknown POSIX class name\0"
@@ -560,6 +560,7 @@ static const char error_texts[] =
   /* 85 */
   "parentheses are too deeply nested (stack check)\0"
   "digits missing in \\x{} or \\o{}\0"
+  "regular expression is too complicated\0"
   ;
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -4566,6 +4567,10 @@ for (;; ptr++)
   pcre_uint32 ec;
   pcre_uchar mcbuffer[8];
 
+  /* Come here to restart the loop without advancing the pointer. */
+
+  REDO_LOOP:
+
   /* Get next character in the pattern */
 
   c = *ptr;
@@ -4591,7 +4596,8 @@ for (;; ptr++)
     if (code > cd->start_workspace + cd->workspace_size -
         WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */
       {
-      *errorcodeptr = ERR52;
+      *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)?
+        ERR52 : ERR87;
       goto FAILED;
       }
 
@@ -4645,9 +4651,10 @@ for (;; ptr++)
     goto FAILED;
     }
 
-  /* If in \Q...\E, check for the end; if not, we have a literal */
+  /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
+  isolated \E is ignored. */
 
-  if (inescq && c != CHAR_NULL)
+  if (c != CHAR_NULL)
     {
     if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
       {
@@ -4655,7 +4662,7 @@ for (;; ptr++)
       ptr++;
       continue;
       }
-    else
+    else if (inescq)
       {
       if (previous_callout != NULL)
         {
@@ -4670,18 +4677,27 @@ for (;; ptr++)
         }
       goto NORMAL_CHAR;
       }
-    /* Control does not reach here. */
+
+    /* Check for the start of a \Q...\E sequence. We must do this here rather
+    than later in case it is immediately followed by \E, which turns it into a
+    "do nothing" sequence. */
+
+    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
+      {
+      inescq = TRUE;
+      ptr++;
+      continue;
+      }
     }
 
-  /* In extended mode, skip white space and comments. We need a loop in order
-  to check for more white space and more comments after a comment. */
+  /* In extended mode, skip white space and comments. */
 
   if ((options & PCRE_EXTENDED) != 0)
     {
-    for (;;)
+    const pcre_uchar *wscptr = ptr;
+    while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+    if (c == CHAR_NUMBER_SIGN)
       {
-      while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
-      if (c != CHAR_NUMBER_SIGN) break;
       ptr++;
       while (*ptr != CHAR_NULL)
         {
@@ -4695,8 +4711,29 @@ for (;; ptr++)
         if (utf) FORWARDCHAR(ptr);
 #endif
         }
-      c = *ptr;     /* Either NULL or the char after a newline */
       }
+
+    /* If we skipped any characters, restart the loop. Otherwise, we didn't see
+    a comment. */
+
+    if (ptr > wscptr) goto REDO_LOOP;
+    }
+
+  /* Skip over (?# comments. We need to do this here because we want to know if
+  the next thing is a quantifier, and these comments may come between an item
+  and its quantifier. */
+
+  if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
+      ptr[2] == CHAR_NUMBER_SIGN)
+    {
+    ptr += 3;
+    while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+    if (*ptr == CHAR_NULL)
+      {
+      *errorcodeptr = ERR18;
+      goto FAILED;
+      }
+    continue;
     }
 
   /* See if the next thing is a quantifier. */
@@ -4820,15 +4857,15 @@ for (;; ptr++)
     if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
       {
       nestptr = ptr + 7;
-      ptr = sub_start_of_word - 1;
-      continue;
+      ptr = sub_start_of_word;
+      goto REDO_LOOP;
       }
 
     if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
       {
       nestptr = ptr + 7;
-      ptr = sub_end_of_word - 1;
-      continue;
+      ptr = sub_end_of_word;
+      goto REDO_LOOP;
       }
 
     /* Handle a real character class. */
@@ -5046,20 +5083,22 @@ for (;; ptr++)
             ptr = tempptr + 1;
             continue;
 
-            /* For the other POSIX classes (ascii, xdigit) we are going to fall
-            through to the non-UCP case and build a bit map for characters with
-            code points less than 256. If we are in a negated POSIX class
-            within a non-negated overall class, characters with code points
-            greater than 255 must all match. In the special case where we have
-            not yet generated any xclass data, and this is the final item in
-            the overall class, we need do nothing: later on, the opcode
+            /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
+            to fall through to the non-UCP case and build a bit map for
+            characters with code points less than 256. If we are in a negated
+            POSIX class, characters with code points greater than 255 must
+            either all match or all not match. In the special case where we
+            have not yet generated any xclass data, and this is the final item
+            in the overall class, we need do nothing: later on, the opcode
             OP_NCLASS will be used to indicate that characters greater than 255
             are acceptable. If we have already seen an xclass item or one may
             follow (we have to assume that it might if this is not the end of
-            the class), explicitly match all wide codepoints. */
+            the class), explicitly list all wide codepoints, which will then
+            either not match or match, depending on whether the class is or is
+            not negated. */
 
             default:
-            if (!negate_class && local_negate &&
+            if (local_negate &&
                 (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
               {
               *class_uchardata++ = XCL_RANGE;
@@ -6529,21 +6568,6 @@ for (;; ptr++)
     case CHAR_LEFT_PARENTHESIS:
     ptr++;
 
-    /* First deal with comments. Putting this code right at the start ensures
-    that comments have no bad side effects. */
-
-    if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
-      {
-      ptr += 2;
-      while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
-      if (*ptr == CHAR_NULL)
-        {
-        *errorcodeptr = ERR18;
-        goto FAILED;
-        }
-      continue;
-      }
-
     /* Now deal with various "verbs" that can be introduced by '*'. */
 
     if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
@@ -6604,8 +6628,21 @@ for (;; ptr++)
             cd->had_accept = TRUE;
             for (oc = cd->open_caps; oc != NULL; oc = oc->next)
               {
-              *code++ = OP_CLOSE;
-              PUT2INC(code, 0, oc->number);
+              if (lengthptr != NULL)
+                {
+#ifdef COMPILE_PCRE8
+                *lengthptr += 1 + IMM2_SIZE;
+#elif defined COMPILE_PCRE16
+                *lengthptr += 2 + IMM2_SIZE;
+#elif defined COMPILE_PCRE32
+                *lengthptr += 4 + IMM2_SIZE;
+#endif
+                }
+              else
+                {
+                *code++ = OP_CLOSE;
+                PUT2INC(code, 0, oc->number);
+                }
               }
             setverb = *code++ =
               (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
@@ -6734,6 +6771,15 @@ for (;; ptr++)
           for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
           if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
             tempptr += i + 1;
+
+          /* tempptr should now be pointing to the opening parenthesis of the
+          assertion condition. */
+
+          if (*tempptr != CHAR_LEFT_PARENTHESIS)
+            {
+            *errorcodeptr = ERR28;
+            goto FAILED;
+            }
           }
 
         /* For conditions that are assertions, check the syntax, and then exit
@@ -7258,7 +7304,7 @@ for (;; ptr++)
           issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
           only mode, we finesse the bug by allowing more memory always. */
 
-          *lengthptr += 2 + 2*LINK_SIZE;
+          *lengthptr += 4 + 4*LINK_SIZE;
 
           /* It is even worse than that. The current reference may be to an
           existing named group with a different number (so apparently not
@@ -7274,7 +7320,12 @@ for (;; ptr++)
           so far in order to get the number. If the name is not found, leave
           the value of recno as 0 for a forward reference. */
 
-          else
+          /* This patch (removing "else") fixes a problem when a reference is
+          to multiple identically named nested groups from within the nest.
+          Once again, it is not the "proper" fix, and it results in an
+          over-allocation of memory. */
+
+          /* else */
             {
             ng = cd->named_groups;
             for (i = 0; i < cd->names_found; i++, ng++)
@@ -7585,39 +7636,15 @@ for (;; ptr++)
         newoptions = (options | set) & (~unset);
 
         /* If the options ended with ')' this is not the start of a nested
-        group with option changes, so the options change at this level. If this
-        item is right at the start of the pattern, the options can be
-        abstracted and made external in the pre-compile phase, and ignored in
-        the compile phase. This can be helpful when matching -- for instance in
-        caseless checking of required bytes.
-
-        If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
-        definitely *not* at the start of the pattern because something has been
-        compiled. In the pre-compile phase, however, the code pointer can have
-        that value after the start, because it gets reset as code is discarded
-        during the pre-compile. However, this can happen only at top level - if
-        we are within parentheses, the starting BRA will still be present. At
-        any parenthesis level, the length value can be used to test if anything
-        has been compiled at that level. Thus, a test for both these conditions
-        is necessary to ensure we correctly detect the start of the pattern in
-        both phases.
-
+        group with option changes, so the options change at this level.
         If we are not at the pattern start, reset the greedy defaults and the
         case value for firstchar and reqchar. */
 
         if (*ptr == CHAR_RIGHT_PARENTHESIS)
           {
-          if (code == cd->start_code + 1 + LINK_SIZE &&
-               (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
-            {
-            cd->external_options = newoptions;
-            }
-          else
-            {
-            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
-            greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
-            }
+          greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
+          greedy_non_default = greedy_default ^ 1;
+          req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
 
           /* Change options at this level, and pass them back for use
           in subsequent branches. */
@@ -7896,16 +7923,6 @@ for (;; ptr++)
       c = ec;
     else
       {
-      if (escape == ESC_Q)            /* Handle start of quoted string */
-        {
-        if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
-          ptr += 2;               /* avoid empty string */
-            else inescq = TRUE;
-        continue;
-        }
-
-      if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */
-
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */
 

Modified: tomcat/jk/trunk/native/iis/pcre/pcre_get.c
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/pcre_get.c?rev=1777023&r1=1777022&r2=1777023&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/pcre_get.c (original)
+++ tomcat/jk/trunk/native/iis/pcre/pcre_get.c Mon Jan  2 21:21:08 2017
@@ -250,6 +250,7 @@ Arguments:
   code         the compiled regex
   stringname   the name of the capturing substring
   ovector      the vector of matched substrings
+  stringcount  number of captured substrings
 
 Returns:       the number of the first that is set,
                or the number of the last one if none are set,
@@ -258,13 +259,16 @@ Returns:       the number of the first t
 
 #if defined COMPILE_PCRE8
 static int
-get_first_set(const pcre *code, const char *stringname, int *ovector)
+get_first_set(const pcre *code, const char *stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE16
 static int
-get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE32
 static int
-get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
+get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
+  int stringcount)
 #endif
 {
 const REAL_PCRE *re = (const REAL_PCRE *)code;
@@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
   {
   int n = GET2(entry, 0);
-  if (ovector[n*2] >= 0) return n;
+  if (n < stringcount && ovector[n*2] >= 0) return n;
   }
 return GET2(entry, 0);
 }
@@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32
   PCRE_UCHAR32 *buffer, int size)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
@@ -457,7 +461,10 @@ pcre_uchar **stringlist;
 pcre_uchar *p;
 
 for (i = 0; i < double_count; i += 2)
-  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
+  {
+  size += sizeof(pcre_uchar *) + IN_UCHARS(1);
+  if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
+  }
 
 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
@@ -473,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringco
 
 for (i = 0; i < double_count; i += 2)
   {
-  int len = ovector[i+1] - ovector[i];
+  int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
   memcpy(p, subject + ovector[i], IN_UCHARS(len));
   *stringlist++ = p;
   p += len;
@@ -619,7 +626,7 @@ pcre32_get_named_substring(const pcre32
   PCRE_SPTR32 *stringptr)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);

Modified: tomcat/jk/trunk/native/iis/pcre/pcre_internal.h
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/pcre_internal.h?rev=1777023&r1=1777022&r2=1777023&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/pcre_internal.h (original)
+++ tomcat/jk/trunk/native/iis/pcre/pcre_internal.h Mon Jan  2 21:21:08 2017
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2014 University of Cambridge
+           Copyright (c) 1997-2016 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -275,7 +275,7 @@ pcre.h(.in) and disable (comment out) th
 
 typedef pcre_uint16 pcre_uchar;
 #define UCHAR_SHIFT (1)
-#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
+#define IN_UCHARS(x) ((x) * 2)
 #define MAX_255(c) ((c) <= 255u)
 #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
 
@@ -283,7 +283,7 @@ typedef pcre_uint16 pcre_uchar;
 
 typedef pcre_uint32 pcre_uchar;
 #define UCHAR_SHIFT (2)
-#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
+#define IN_UCHARS(x) ((x) * 4)
 #define MAX_255(c) ((c) <= 255u)
 #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
 
@@ -2289,7 +2289,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
        ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
-       ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERRCOUNT };
+       ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
 
 /* JIT compiling modes. The function list is indexed by them. */
 



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org