You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@apache.org on 2001/07/10 14:39:24 UTC
cvs commit: jakarta-oro/src/java/org/apache/oro/text/awk AwkMatcher.java
dfs 01/07/10 05:39:23
Modified: src/java/org/apache/oro/text/awk AwkMatcher.java
Log:
Fixed the following bug reported by larrybar@eng.auburn.edu:
Using PatternMatcherInput(String input, int begin, int length) with Awk
requires length = length_of_substring + begin instead of
length_of_substring as the documentation indicates. Also, MatchResult
beginOffset(int) and endOffset(int) return offsets from -begin instead
of zero. There is no problem with Perl5.
The fix is klugey and indicative of a need to redesign and reimplement
the AwkMatcher input representation and traversal system.
PR: 1884
Revision Changes Path
1.6 +45 -31 jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java
Index: AwkMatcher.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- AwkMatcher.java 2001/05/20 23:55:21 1.5
+++ AwkMatcher.java 2001/07/10 12:39:18 1.6
@@ -58,7 +58,7 @@
*/
/*
- * $Id: AwkMatcher.java,v 1.5 2001/05/20 23:55:21 dfs Exp $
+ * $Id: AwkMatcher.java,v 1.6 2001/07/10 12:39:18 dfs Exp $
*/
import java.io.*;
@@ -90,6 +90,15 @@
private AwkPattern __awkPattern;
private int __offsets[] = new int[2];
+ /**
+ * A kluge variable to make PatternMatcherInput matches work when
+ * their begin offset is non-zero. This kluge is caused by the
+ * misguided notion that AwkStreamInput could be overloaded to do
+ * both stream and fixed buffer matches. The whole input representation
+ * scheme has to be scrapped and redone. -- dfs 2001/07/10
+ */
+ private int __beginOffset;
+
public AwkMatcher() {
__scratchBuffer = new AwkStreamInput();
__scratchBuffer._endOfStreamReached = true;
@@ -120,7 +129,7 @@
__scratchBuffer._buffer = input;
__scratchBuffer._bufferSize = input.length;
- __scratchBuffer._bufferOffset = 0;
+ __scratchBuffer._bufferOffset = __beginOffset = 0;
__scratchBuffer._endOfStreamReached = true;
__streamSearchBuffer = __scratchBuffer;
__offsets[0] = offset;
@@ -203,7 +212,7 @@
__awkPattern = (AwkPattern)pattern;
__scratchBuffer._buffer = input.getBuffer();
- __scratchBuffer._bufferOffset = input.getBeginOffset();
+ __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
__offsets[0] = input.getCurrentOffset();
__scratchBuffer._bufferSize = input.length();
@@ -254,7 +263,7 @@
__awkPattern = (AwkPattern)pattern;
__scratchBuffer._buffer = input;
__scratchBuffer._bufferSize = input.length;
- __scratchBuffer._bufferOffset = 0;
+ __scratchBuffer._bufferOffset = __beginOffset = 0;
__scratchBuffer._endOfStreamReached = true;
__streamSearchBuffer = __scratchBuffer;
__offsets[0] = 0;
@@ -331,7 +340,7 @@
__awkPattern = (AwkPattern)pattern;
__scratchBuffer._buffer = input.getBuffer();
__scratchBuffer._bufferSize = input.length();
- __scratchBuffer._bufferOffset = input.getBeginOffset();
+ __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
__offsets[0] = input.getBeginOffset();
__scratchBuffer._endOfStreamReached = true;
__streamSearchBuffer = __scratchBuffer;
@@ -391,7 +400,7 @@
__scratchBuffer._buffer = input;
__scratchBuffer._bufferSize = input.length;
- __scratchBuffer._bufferOffset = 0;
+ __scratchBuffer._bufferOffset = __beginOffset = 0;
__scratchBuffer._endOfStreamReached = true;
__streamSearchBuffer = __scratchBuffer;
__lastMatchedBufferOffset = 0;
@@ -499,23 +508,20 @@
public boolean contains(PatternMatcherInput input, Pattern pattern) {
__awkPattern = (AwkPattern)pattern;
__scratchBuffer._buffer = input.getBuffer();
- __scratchBuffer._bufferOffset = input.getBeginOffset();
+ __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
__lastMatchedBufferOffset = input.getCurrentOffset();
// Begin anchor requires match occur at beginning of input
// No need to adjust current offset if no match found.
if(__awkPattern._hasBeginAnchor) {
- int begin;
-
- begin = input.getBeginOffset();
- if(begin != __lastMatchedBufferOffset ||
- !__awkPattern._fastMap[__scratchBuffer._buffer[begin]]) {
+ if(__beginOffset != __lastMatchedBufferOffset ||
+ !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) {
__lastMatchResult = null;
return false;
}
}
- __scratchBuffer._bufferSize = input.length();
+ __scratchBuffer._bufferSize = input.length();
__scratchBuffer._endOfStreamReached = true;
__streamSearchBuffer = __scratchBuffer;
try {
@@ -612,6 +618,7 @@
__lastMatchedBufferOffset = input._currentOffset;
__streamSearchBuffer = input;
+ __beginOffset = 0;
_search();
input._currentOffset = __lastMatchedBufferOffset;
return (__lastMatchResult != null);
@@ -620,13 +627,15 @@
private int __streamMatchPrefix() throws IOException {
int token, current = AwkPattern._START_STATE, lastState, transition;
- int offset, initialOffset;
+ int offset, initialOffset, maxOffset;
int lastMatchedOffset = -1;
int[] tstateArray;
offset = initialOffset = __offsets[0];
+ maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
+
test:
- while(offset < __streamSearchBuffer._bufferSize) {
+ while(offset < maxOffset) {
token = __streamSearchBuffer._buffer[offset++];
if(current < __awkPattern._numStates) {
@@ -638,25 +647,27 @@
__awkPattern._createNewState(lastState, token, tstateArray);
current = tstateArray[token];
}
+
if(current == AwkPattern._INVALID_STATE){
break test;
- }
- else if(__awkPattern._endStates.get(current)){
+ } else if(__awkPattern._endStates.get(current)){
lastMatchedOffset = offset;
}
- if(offset == __streamSearchBuffer._bufferSize){
- offset = __streamSearchBuffer._reallocate(initialOffset);
+ if(offset == maxOffset){
+ offset =
+ __streamSearchBuffer._reallocate(initialOffset) + __beginOffset;
+
+ maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
+
// If we're at the end of the stream, don't reset values
- if(offset != __streamSearchBuffer._bufferSize){
+ if(offset != maxOffset){
if(lastMatchedOffset != -1)
lastMatchedOffset-=initialOffset;
initialOffset = 0;
}
-
}
- }
- else
+ } else
break;
}
@@ -669,7 +680,7 @@
// End anchor requires match occur at end of input
if(__awkPattern._hasEndAnchor &&
(!__streamSearchBuffer._endOfStreamReached ||
- lastMatchedOffset < __streamSearchBuffer._bufferSize))
+ lastMatchedOffset < __streamSearchBuffer._bufferSize + __beginOffset))
return -1;
return (lastMatchedOffset - initialOffset);
@@ -685,7 +696,8 @@
__lastMatchResult = null;
while(true){
- if(__lastMatchedBufferOffset >= __streamSearchBuffer._bufferSize){
+ if(__lastMatchedBufferOffset >=
+ __streamSearchBuffer._bufferSize + __beginOffset) {
if(__streamSearchBuffer._endOfStreamReached){
// Get rid of reference now that it should no longer be used.
__streamSearchBuffer = null;
@@ -697,8 +709,8 @@
}
}
- for(position=__lastMatchedBufferOffset;
- position < __streamSearchBuffer._bufferSize;
+ for(position = __lastMatchedBufferOffset;
+ position < __streamSearchBuffer._bufferSize + __beginOffset;
position = __offsets[0] + 1) {
__offsets[0] = position;
@@ -707,16 +719,14 @@
__lastMatchResult = new AwkMatchResult(
new String(__streamSearchBuffer._buffer, __offsets[0],
- tokensMatched),
- __offsets[0] + __streamSearchBuffer._bufferOffset);
+ tokensMatched), __offsets[0]);
__lastMatchedBufferOffset =
(tokensMatched > 0 ? __offsets[1] + 1 : __offsets[0] + 1);
return;
} else if(__awkPattern._matchesNullString) {
- __lastMatchResult = new AwkMatchResult(new String(),
- position + __streamSearchBuffer._bufferOffset);
+ __lastMatchResult = new AwkMatchResult(new String(), position);
__lastMatchedBufferOffset = position + 1;
@@ -741,3 +751,7 @@
public MatchResult getMatch() { return __lastMatchResult; }
}
+
+
+
+