You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@locus.apache.org on 2000/09/15 07:22:37 UTC

cvs commit: jakarta-oro/src/java/org/apache/oro/text/regex Perl5Matcher.java

dfs         00/09/14 22:22:37

  Modified:    src/java/org/apache/oro/text/regex Perl5Matcher.java
  Log:
  Jeff ? (jlb@houseofdistraction.com) and Peter Kronenberg
  (pkronenberg@predictivetechnologies.com) identified a bug in the
  behavior of PatternMatcherInput matching methods with respect to
  anchors (^).  Matches were always being performed interpreting the
  beginning of the string from either a 0 or a current offset rather
  than from the beginOffset.  Essentially, the beginning of the string
  for purposes of matching ^ wasn't being associated with beginOffset.
  This change fixes the problem.
  
  Revision  Changes    Path
  1.4       +23 -14    jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java
  
  Index: Perl5Matcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Perl5Matcher.java	2000/09/02 06:28:34	1.3
  +++ Perl5Matcher.java	2000/09/15 05:22:36	1.4
  @@ -66,7 +66,7 @@
    * Perl5Compiler.
   
    @author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
  - @version $Id: Perl5Matcher.java,v 1.3 2000/09/02 06:28:34 jon Exp $
  + @version $Id: Perl5Matcher.java,v 1.4 2000/09/15 05:22:36 dfs Exp $
   
    * @see PatternMatcher
    * @see Perl5Compiler
  @@ -191,7 +191,9 @@
   
     // Initialize globals needed before calling __tryExpression for first time
     private void __initInterpreterGlobals(Perl5Pattern expression, char[] input,
  -					int beginOffset, int endOffset) {
  +					int beginOffset, int endOffset,
  +					int currentOffset)
  +  {
       __input                      = input;
       __endOffset                  = endOffset;
       __currentRep                 = new Perl5Repetition();
  @@ -200,16 +202,20 @@
       __program                    = expression._program;
       __stack.setSize(0);
   
  -    if(beginOffset == 0)
  +    // currentOffset should always be >= beginOffset and should
  +    // always be equal to zero when beginOffset equals 0, but we
  +    // make a weak attempt to protect against a violation of this
  +    // precondition
  +    if(currentOffset == beginOffset || currentOffset <= 0)
         __previousChar = '\n';
       else {
  -      __previousChar = input[beginOffset - 1];
  +      __previousChar = input[currentOffset - 1];
         if(!__multiline && __previousChar == '\n')
   	__previousChar = '\0';
       }
   
       __numParentheses    = expression._numParentheses;
  -    __currentOffset     = beginOffset;
  +    __currentOffset     = currentOffset;
   
       __bol = beginOffset;
       __eol = endOffset;
  @@ -276,13 +282,15 @@
     // __originalInput must be set before calling this method for
     // __lastMatchResult to be set correctly.
     private boolean __interpret(Perl5Pattern expression, char[] input,
  -			      int beginOffset, int endOffset)
  +			      int beginOffset, int endOffset,
  +			      int currentOffset)
     {
       boolean success;
       int minLength = 0, dontTry = 0, offset;
       char ch, mustString[];
   
  -    __initInterpreterGlobals(expression, input, beginOffset, endOffset);
  +    __initInterpreterGlobals(expression, input, beginOffset, endOffset,
  +			     currentOffset);
   
       success = false;
       mustString = expression._mustString;
  @@ -1244,7 +1252,7 @@
       if(expression._isCaseInsensitive)
         input = _toLower(input);
   
  -    __initInterpreterGlobals(expression, input, offset, input.length);
  +    __initInterpreterGlobals(expression, input, 0, input.length, offset);
   
       __lastSuccess = __tryExpression(expression, offset);
       __lastMatchResult = null;
  @@ -1322,8 +1330,8 @@
       } else
         inp = __originalInput;
   
  -    __initInterpreterGlobals(expression, inp, input._currentOffset,
  -			    input._endOffset);
  +    __initInterpreterGlobals(expression, inp, input._beginOffset,
  +			     input._endOffset, input._currentOffset);
       __lastSuccess = __tryExpression(expression, input._currentOffset);
       __lastMatchResult = null;
   
  @@ -1384,7 +1392,7 @@
   	return true;
       }
       */
  -    __initInterpreterGlobals(expression, input, 0, input.length);
  +    __initInterpreterGlobals(expression, input, 0, input.length, 0);
       __lastSuccess = (__tryExpression(expression, 0) &&
   		     __endMatchOffsets[0] == input.length);
       __lastMatchResult = null;
  @@ -1514,7 +1522,7 @@
       */
   
       __initInterpreterGlobals(expression, inp, input._beginOffset,
  -			    input._endOffset);
  +			    input._endOffset, input._beginOffset);
   
       __lastMatchResult = null;
   
  @@ -1609,7 +1617,7 @@
       if(expression._isCaseInsensitive)
         input = _toLower(input);
   
  -    return __interpret(expression, input, 0, input.length);
  +    return __interpret(expression, input, 0, input.length, 0);
     }
   
   
  @@ -1713,7 +1721,8 @@
       __lastMatchInputEndOffset = input.getMatchEndOffset();
   
       matchFound =
  -      __interpret(expression, inp, input._currentOffset, input._endOffset);
  +      __interpret(expression, inp, input._beginOffset, input._endOffset,
  +		  input._currentOffset);
   
       if(matchFound) {
         input.setCurrentOffset(__endMatchOffsets[0]);