You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@locus.apache.org on 2000/09/15 07:22:37 UTC
cvs commit: jakarta-oro/src/java/org/apache/oro/text/regex Perl5Matcher.java
dfs 00/09/14 22:22:37
Modified: src/java/org/apache/oro/text/regex Perl5Matcher.java
Log:
Jeff ? (jlb@houseofdistraction.com) and Peter Kronenberg
(pkronenberg@predictivetechnologies.com) identified a bug in the
behavior of PatternMatcherInput matching methods with respect to
anchors (^). Matches were always being performed interpreting the
beginning of the string from either a 0 or a current offset rather
than from the beginOffset. Essentially, the beginning of the string
for purposes of matching ^ wasn't being associated with beginOffset.
This change fixes the problem.
Revision Changes Path
1.4 +23 -14 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java
Index: Perl5Matcher.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- Perl5Matcher.java 2000/09/02 06:28:34 1.3
+++ Perl5Matcher.java 2000/09/15 05:22:36 1.4
@@ -66,7 +66,7 @@
* Perl5Compiler.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Matcher.java,v 1.3 2000/09/02 06:28:34 jon Exp $
+ @version $Id: Perl5Matcher.java,v 1.4 2000/09/15 05:22:36 dfs Exp $
* @see PatternMatcher
* @see Perl5Compiler
@@ -191,7 +191,9 @@
// Initialize globals needed before calling __tryExpression for first time
private void __initInterpreterGlobals(Perl5Pattern expression, char[] input,
- int beginOffset, int endOffset) {
+ int beginOffset, int endOffset,
+ int currentOffset)
+ {
__input = input;
__endOffset = endOffset;
__currentRep = new Perl5Repetition();
@@ -200,16 +202,20 @@
__program = expression._program;
__stack.setSize(0);
- if(beginOffset == 0)
+ // currentOffset should always be >= beginOffset and should
+ // always be equal to zero when beginOffset equals 0, but we
+ // make a weak attempt to protect against a violation of this
+ // precondition
+ if(currentOffset == beginOffset || currentOffset <= 0)
__previousChar = '\n';
else {
- __previousChar = input[beginOffset - 1];
+ __previousChar = input[currentOffset - 1];
if(!__multiline && __previousChar == '\n')
__previousChar = '\0';
}
__numParentheses = expression._numParentheses;
- __currentOffset = beginOffset;
+ __currentOffset = currentOffset;
__bol = beginOffset;
__eol = endOffset;
@@ -276,13 +282,15 @@
// __originalInput must be set before calling this method for
// __lastMatchResult to be set correctly.
private boolean __interpret(Perl5Pattern expression, char[] input,
- int beginOffset, int endOffset)
+ int beginOffset, int endOffset,
+ int currentOffset)
{
boolean success;
int minLength = 0, dontTry = 0, offset;
char ch, mustString[];
- __initInterpreterGlobals(expression, input, beginOffset, endOffset);
+ __initInterpreterGlobals(expression, input, beginOffset, endOffset,
+ currentOffset);
success = false;
mustString = expression._mustString;
@@ -1244,7 +1252,7 @@
if(expression._isCaseInsensitive)
input = _toLower(input);
- __initInterpreterGlobals(expression, input, offset, input.length);
+ __initInterpreterGlobals(expression, input, 0, input.length, offset);
__lastSuccess = __tryExpression(expression, offset);
__lastMatchResult = null;
@@ -1322,8 +1330,8 @@
} else
inp = __originalInput;
- __initInterpreterGlobals(expression, inp, input._currentOffset,
- input._endOffset);
+ __initInterpreterGlobals(expression, inp, input._beginOffset,
+ input._endOffset, input._currentOffset);
__lastSuccess = __tryExpression(expression, input._currentOffset);
__lastMatchResult = null;
@@ -1384,7 +1392,7 @@
return true;
}
*/
- __initInterpreterGlobals(expression, input, 0, input.length);
+ __initInterpreterGlobals(expression, input, 0, input.length, 0);
__lastSuccess = (__tryExpression(expression, 0) &&
__endMatchOffsets[0] == input.length);
__lastMatchResult = null;
@@ -1514,7 +1522,7 @@
*/
__initInterpreterGlobals(expression, inp, input._beginOffset,
- input._endOffset);
+ input._endOffset, input._beginOffset);
__lastMatchResult = null;
@@ -1609,7 +1617,7 @@
if(expression._isCaseInsensitive)
input = _toLower(input);
- return __interpret(expression, input, 0, input.length);
+ return __interpret(expression, input, 0, input.length, 0);
}
@@ -1713,7 +1721,8 @@
__lastMatchInputEndOffset = input.getMatchEndOffset();
matchFound =
- __interpret(expression, inp, input._currentOffset, input._endOffset);
+ __interpret(expression, inp, input._beginOffset, input._endOffset,
+ input._currentOffset);
if(matchFound) {
input.setCurrentOffset(__endMatchOffsets[0]);