You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@apache.org on 2001/07/10 14:39:24 UTC

cvs commit: jakarta-oro/src/java/org/apache/oro/text/awk AwkMatcher.java

dfs         01/07/10 05:39:23

  Modified:    src/java/org/apache/oro/text/awk AwkMatcher.java
  Log:
  Fixed the following bug reported by larrybar@eng.auburn.edu:
  
    Using PatternMatcherInput(String input, int begin, int length) with Awk
    requires  length = length_of_substring + begin  instead of
    length_of_substring as the documentation indicates. Also, MatchResult
    beginOffset(int) and endOffset(int) return offsets from  -begin instead
    of zero. There is no problem with Perl5.
  
  The fix is klugey and indicative of a need to redesign and reimplement
  the AwkMatcher input representation and traversal system.
  
  PR: 1884
  
  Revision  Changes    Path
  1.6       +45 -31    jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java
  
  Index: AwkMatcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- AwkMatcher.java	2001/05/20 23:55:21	1.5
  +++ AwkMatcher.java	2001/07/10 12:39:18	1.6
  @@ -58,7 +58,7 @@
    */
   
   /*
  - * $Id: AwkMatcher.java,v 1.5 2001/05/20 23:55:21 dfs Exp $
  + * $Id: AwkMatcher.java,v 1.6 2001/07/10 12:39:18 dfs Exp $
    */
   import java.io.*;
   
  @@ -90,6 +90,15 @@
     private AwkPattern __awkPattern;
     private int __offsets[] = new int[2];
   
  +  /**
  +   * A kluge variable to make PatternMatcherInput matches work when
  +   * their begin offset is non-zero.  This kluge is caused by the
  +   * misguided notion that AwkStreamInput could be overloaded to do
  +   * both stream and fixed buffer matches.  The whole input representation
  +   * scheme has to be scrapped and redone. -- dfs 2001/07/10
  +   */
  +  private int __beginOffset;
  +
     public AwkMatcher() {
       __scratchBuffer = new AwkStreamInput();
       __scratchBuffer._endOfStreamReached = true;
  @@ -120,7 +129,7 @@
   
       __scratchBuffer._buffer       = input;
       __scratchBuffer._bufferSize   = input.length;
  -    __scratchBuffer._bufferOffset = 0;
  +    __scratchBuffer._bufferOffset = __beginOffset = 0;
       __scratchBuffer._endOfStreamReached = true;
       __streamSearchBuffer = __scratchBuffer;
       __offsets[0] = offset;
  @@ -203,7 +212,7 @@
   
       __awkPattern = (AwkPattern)pattern;
       __scratchBuffer._buffer       = input.getBuffer();
  -    __scratchBuffer._bufferOffset = input.getBeginOffset();
  +    __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
       __offsets[0] = input.getCurrentOffset();
   
       __scratchBuffer._bufferSize   = input.length();
  @@ -254,7 +263,7 @@
       __awkPattern = (AwkPattern)pattern;
       __scratchBuffer._buffer       = input;
       __scratchBuffer._bufferSize   = input.length;
  -    __scratchBuffer._bufferOffset = 0;
  +    __scratchBuffer._bufferOffset = __beginOffset = 0;
       __scratchBuffer._endOfStreamReached = true;
       __streamSearchBuffer = __scratchBuffer;
       __offsets[0] = 0;
  @@ -331,7 +340,7 @@
       __awkPattern = (AwkPattern)pattern;
       __scratchBuffer._buffer       = input.getBuffer();
       __scratchBuffer._bufferSize   = input.length();
  -    __scratchBuffer._bufferOffset = input.getBeginOffset();
  +    __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
       __offsets[0] = input.getBeginOffset();
       __scratchBuffer._endOfStreamReached = true;
       __streamSearchBuffer = __scratchBuffer;
  @@ -391,7 +400,7 @@
   
       __scratchBuffer._buffer       = input;
       __scratchBuffer._bufferSize   = input.length;
  -    __scratchBuffer._bufferOffset = 0;
  +    __scratchBuffer._bufferOffset = __beginOffset = 0;
       __scratchBuffer._endOfStreamReached = true;
       __streamSearchBuffer = __scratchBuffer;
       __lastMatchedBufferOffset = 0;
  @@ -499,23 +508,20 @@
     public boolean contains(PatternMatcherInput input, Pattern pattern) {
       __awkPattern = (AwkPattern)pattern;
       __scratchBuffer._buffer       = input.getBuffer();
  -    __scratchBuffer._bufferOffset = input.getBeginOffset();
  +    __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
       __lastMatchedBufferOffset = input.getCurrentOffset();
   
       // Begin anchor requires match occur at beginning of input
       // No need to adjust current offset if no match found.
       if(__awkPattern._hasBeginAnchor) {
  -      int begin;
  -
  -      begin = input.getBeginOffset();
  -      if(begin != __lastMatchedBufferOffset ||
  -	 !__awkPattern._fastMap[__scratchBuffer._buffer[begin]]) {
  +      if(__beginOffset != __lastMatchedBufferOffset ||
  +	 !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) {
   	__lastMatchResult = null;
   	return false;
         }
       }
   
  -    __scratchBuffer._bufferSize   = input.length();
  +    __scratchBuffer._bufferSize = input.length();
       __scratchBuffer._endOfStreamReached = true;
       __streamSearchBuffer = __scratchBuffer;
       try {
  @@ -612,6 +618,7 @@
   
       __lastMatchedBufferOffset = input._currentOffset;
       __streamSearchBuffer = input;
  +    __beginOffset = 0;
       _search();
       input._currentOffset = __lastMatchedBufferOffset;
       return (__lastMatchResult != null);
  @@ -620,13 +627,15 @@
   
     private int __streamMatchPrefix() throws IOException {
       int token, current = AwkPattern._START_STATE, lastState, transition;
  -    int offset, initialOffset;
  +    int offset, initialOffset, maxOffset;
       int lastMatchedOffset = -1;
       int[] tstateArray;
   
       offset = initialOffset = __offsets[0];
  +    maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
  +
     test:
  -    while(offset < __streamSearchBuffer._bufferSize) {
  +    while(offset < maxOffset) {
         token = __streamSearchBuffer._buffer[offset++];
   
         if(current < __awkPattern._numStates) {
  @@ -638,25 +647,27 @@
   	  __awkPattern._createNewState(lastState, token, tstateArray);
   	  current = tstateArray[token];
   	}
  +
   	if(current == AwkPattern._INVALID_STATE){
   	  break test;
  -	}
  -	else if(__awkPattern._endStates.get(current)){
  +	} else if(__awkPattern._endStates.get(current)){
   	  lastMatchedOffset = offset;
   	}
  -	if(offset == __streamSearchBuffer._bufferSize){
  -	  offset = __streamSearchBuffer._reallocate(initialOffset);
   
  +	if(offset == maxOffset){
  +	  offset =
  +	    __streamSearchBuffer._reallocate(initialOffset) + __beginOffset;
  +	  
  +	  maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
  +
   	  // If we're at the end of the stream, don't reset values
  -	  if(offset != __streamSearchBuffer._bufferSize){
  +	  if(offset != maxOffset){
   	    if(lastMatchedOffset != -1)
   	      lastMatchedOffset-=initialOffset;
   	    initialOffset = 0;
   	  }
  -
   	}
  -      }
  -      else
  +      } else
   	break;
       }
   
  @@ -669,7 +680,7 @@
       // End anchor requires match occur at end of input
       if(__awkPattern._hasEndAnchor &&
          (!__streamSearchBuffer._endOfStreamReached ||
  -	lastMatchedOffset < __streamSearchBuffer._bufferSize))
  +	lastMatchedOffset < __streamSearchBuffer._bufferSize + __beginOffset))
         return -1;
   
       return (lastMatchedOffset - initialOffset);
  @@ -685,7 +696,8 @@
       __lastMatchResult = null;
   
       while(true){
  -      if(__lastMatchedBufferOffset >= __streamSearchBuffer._bufferSize){
  +      if(__lastMatchedBufferOffset >= 
  +	 __streamSearchBuffer._bufferSize + __beginOffset) {
   	if(__streamSearchBuffer._endOfStreamReached){
   	  // Get rid of reference now that it should no longer be used.
   	  __streamSearchBuffer = null;
  @@ -697,8 +709,8 @@
   	}
         }
   
  -      for(position=__lastMatchedBufferOffset;
  -	  position < __streamSearchBuffer._bufferSize;
  +      for(position = __lastMatchedBufferOffset;
  +	  position < __streamSearchBuffer._bufferSize + __beginOffset;
   	  position = __offsets[0] + 1) {
   
   	__offsets[0] = position;
  @@ -707,16 +719,14 @@
   
   	  __lastMatchResult  = new AwkMatchResult(
   	  new String(__streamSearchBuffer._buffer, __offsets[0],
  -		     tokensMatched),
  -	  __offsets[0] + __streamSearchBuffer._bufferOffset);
  +		     tokensMatched), __offsets[0]);
   
   	  __lastMatchedBufferOffset =
   	    (tokensMatched > 0 ? __offsets[1] + 1 : __offsets[0] + 1);
   
   	  return;
   	} else if(__awkPattern._matchesNullString) {
  -	  __lastMatchResult  = new AwkMatchResult(new String(),
  -			  position + __streamSearchBuffer._bufferOffset);
  +	  __lastMatchResult  = new AwkMatchResult(new String(), position);
   
   	  __lastMatchedBufferOffset = position + 1;
   
  @@ -741,3 +751,7 @@
     public MatchResult getMatch() { return __lastMatchResult; }
   
   }
  +
  +
  +
  +