You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2010/05/19 18:18:36 UTC
svn commit: r946259 [2/2] -
/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java
Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java?rev=946259&r1=946258&r2=946259&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java Wed May 19 16:18:35 2010
@@ -19,6 +19,9 @@ package org.apache.xerces.impl.xpath.reg
import java.text.CharacterIterator;
import java.util.Locale;
+import java.util.Stack;
+
+import org.apache.xerces.util.IntStack;
/**
* A regular expression matching engine using Non-deterministic Finite Automaton (NFA).
@@ -574,10 +577,7 @@ public class RegularExpression implement
if (tok.type == Token.NONGREEDYCLOSURE) {
op = Op.createNonGreedyClosure();
} else { // Token.CLOSURE
- if (child.getMinLength() == 0)
- op = Op.createClosure(this.numberOfClosures++);
- else
- op = Op.createClosure(-1);
+ op = Op.createClosure(this.numberOfClosures++);
}
op.next = next;
op.setChild(compile(child, op, reverse));
@@ -725,7 +725,7 @@ public class RegularExpression implement
con.match = match;
if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) {
- int matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options);
+ int matchEnd = this. match(con, this.operations, con.start, 1, this.options);
//System.err.println("DEBUG: matchEnd="+matchEnd);
if (matchEnd == con.limit) {
if (con.match != null) {
@@ -782,7 +782,7 @@ public class RegularExpression implement
&& this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) {
if (isSet(this.options, SINGLE_LINE)) {
matchStart = con.start;
- matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options);
+ matchEnd = this. match(con, this.operations, con.start, 1, this.options);
} else {
boolean previousIsEOL = true;
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
@@ -791,8 +791,8 @@ public class RegularExpression implement
previousIsEOL = true;
} else {
if (previousIsEOL) {
- if (0 <= (matchEnd = this. matchCharArray (con, this.operations,
- matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this. match(con, this.operations,
+ matchStart, 1, this.options)))
break;
}
previousIsEOL = false;
@@ -815,8 +815,8 @@ public class RegularExpression implement
if (!range.match(ch)) {
continue;
}
- if (0 <= (matchEnd = this. matchCharArray (con, this.operations,
- matchStart, 1, this.options))) {
+ if (0 <= (matchEnd = this. match(con, this.operations,
+ matchStart, 1, this.options))) {
break;
}
}
@@ -827,7 +827,7 @@ public class RegularExpression implement
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this. matchCharArray (con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options)))
break;
}
}
@@ -846,497 +846,6 @@ public class RegularExpression implement
}
/**
- * @return -1 when not match; offset of the end of matched string when match.
- */
- private int matchCharArray (Context con, Op op, int offset, int dx, int opts) {
-
- char[] target = con.charTarget;
-
-
- while (true) {
- if (op == null)
- return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset;
- if (offset > con.limit || offset < con.start)
- return -1;
- switch (op.type) {
- case Op.CHAR:
- if (isSet(opts, IGNORE_CASE)) {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || !matchIgnoreCase(ch, target [ offset ] ))
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target [ o1 ] ))
- return -1;
- offset = o1;
- }
- } else {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || ch != target [ offset ] )
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || ch != target [ o1 ] )
- return -1;
- offset = o1;
- }
- }
- op = op.next;
- break;
-
- case Op.DOT:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target [ offset ] ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- offset ++;
- } else {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] );
- if (isEOLChar(ch))
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target [ o1 ] ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- o1 --;
- } else {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch);
- if (!isEOLChar(ch))
- return -1;
- }
- offset = o1;
- }
- op = op.next;
- break;
-
- case Op.RANGE:
- case Op.NRANGE:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target [ offset ] ;
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) {
- ch = REUtil.composeFromSurrogates(ch, target[++offset]);
- }
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target [ o1 ] ;
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) {
- ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch);
- }
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
- }
- offset = o1;
- }
- op = op.next;
- break;
-
- case Op.ANCHOR:
- boolean go = false;
- switch (op.getData()) {
- case '^':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.start
- || offset > con.start && offset < con.limit && isEOLChar( target [ offset-1 ] )))
- return -1;
- } else {
- if (offset != con.start)
- return -1;
- }
- break;
-
- case '@': // Internal use only.
- // The @ always matches line beginnings.
- if (!(offset == con.start
- || offset > con.start && isEOLChar( target [ offset-1 ] )))
- return -1;
- break;
-
- case '$':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.limit
- || offset < con.limit && isEOLChar( target [ offset ] )))
- return -1;
- } else {
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target [ offset ] )
- || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN
- && target [ offset+1 ] == LINE_FEED))
- return -1;
- }
- break;
-
- case 'A':
- if (offset != con.start) return -1;
- break;
-
- case 'Z':
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target [ offset ] )
- || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN
- && target [ offset+1 ] == LINE_FEED))
- return -1;
- break;
-
- case 'z':
- if (offset != con.limit) return -1;
- break;
-
- case 'b':
- if (con.length == 0) return -1;
- {
- int after = getWordType(target, con.start, con.limit, offset, opts);
- if (after == WT_IGNORE) return -1;
- int before = getPreviousWordType(target, con.start, con.limit, offset, opts);
- if (after == before) return -1;
- }
- break;
-
- case 'B':
- if (con.length == 0)
- go = true;
- else {
- int after = getWordType(target, con.start, con.limit, offset, opts);
- go = after == WT_IGNORE
- || after == getPreviousWordType(target, con.start, con.limit, offset, opts);
- }
- if (!go) return -1;
- break;
-
- case '<':
- if (con.length == 0 || offset == con.limit) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER)
- return -1;
- break;
-
- case '>':
- if (con.length == 0 || offset == con.start) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER)
- return -1;
- break;
- } // switch anchor type
- op = op.next;
- break;
-
- case Op.BACKREFERENCE:
- {
- int refno = op.getData();
- if (refno <= 0 || refno >= this.nofparen)
- throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno);
- if (con.match.getBeginning(refno) < 0
- || con.match.getEnd(refno) < 0)
- return -1; // ********
- int o2 = con.match.getBeginning(refno);
- int literallen = con.match.getEnd(refno)-o2;
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, o2, literallen))
- return -1;
- offset -= literallen;
- }
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- o2, literallen))
- return -1;
- offset -= literallen;
- }
- }
- }
- op = op.next;
- break;
- case Op.STRING:
- {
- String literal = op.getString();
- int literallen = literal.length();
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, literal, literallen))
- return -1;
- offset -= literallen;
- }
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- literal, literallen))
- return -1;
- offset -= literallen;
- }
- }
- }
- op = op.next;
- break;
-
- case Op.CLOSURE:
- {
- /*
- * Saves current position to avoid
- * zero-width repeats.
- */
- int id = op.getData();
- if (id >= 0) {
- int previousOffset = con.offsets[id];
- if (previousOffset < 0 || previousOffset != offset) {
- con.offsets[id] = offset;
- } else {
- con.offsets[id] = -1;
- op = op.next;
- break;
- }
- }
-
- int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts);
- if (id >= 0) con.offsets[id] = -1;
- if (ret >= 0) return ret;
- op = op.next;
- }
- break;
-
- case Op.QUESTION:
- {
- int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.next;
- }
- break;
-
- case Op.NONGREEDYCLOSURE:
- case Op.NONGREEDYQUESTION:
- {
- int ret = this. matchCharArray (con, op.next, offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.getChild();
- }
- break;
-
- case Op.UNION:
- for (int i = 0; i < op.size(); i ++) {
- int ret = this. matchCharArray (con, op.elementAt(i), offset, dx, opts);
- if (DEBUG) {
- System.err.println("UNION: "+i+", ret="+ret);
- }
- if (ret >= 0) return ret;
- }
- return -1;
-
- case Op.CAPTURE:
- int refno = op.getData();
- if (con.match != null && refno > 0) {
- int save = con.match.getBeginning(refno);
- con.match.setBeginning(refno, offset);
- int ret = this. matchCharArray (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setBeginning(refno, save);
- return ret;
- } else if (con.match != null && refno < 0) {
- int index = -refno;
- int save = con.match.getEnd(index);
- con.match.setEnd(index, offset);
- int ret = this. matchCharArray (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setEnd(index, save);
- return ret;
- }
- op = op.next;
- break;
-
- case Op.LOOKAHEAD:
- if (0 > this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKAHEAD:
- if (0 <= this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.LOOKBEHIND:
- if (0 > this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKBEHIND:
- if (0 <= this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
-
- case Op.INDEPENDENT:
- {
- int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts);
- if (ret < 0) return ret;
- offset = ret;
- op = op.next;
- }
- break;
-
- case Op.MODIFIER:
- {
- int localopts = opts;
- localopts |= op.getData();
- localopts &= ~op.getData2();
- //System.err.println("MODIFIER: "+Integer.toString(opts, 16)+" -> "+Integer.toString(localopts, 16));
- int ret = this. matchCharArray (con, op.getChild(), offset, dx, localopts);
- if (ret < 0) return ret;
- offset = ret;
- op = op.next;
- }
- break;
-
- case Op.CONDITION:
- {
- Op.ConditionOp cop = (Op.ConditionOp)op;
- boolean matchp = false;
- if (cop.refNumber > 0) {
- if (cop.refNumber >= this.nofparen)
- throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber);
- matchp = con.match.getBeginning(cop.refNumber) >= 0
- && con.match.getEnd(cop.refNumber) >= 0;
- } else {
- matchp = 0 <= this. matchCharArray (con, cop.condition, offset, dx, opts);
- }
-
- if (matchp) {
- op = cop.yes;
- } else if (cop.no != null) {
- op = cop.no;
- } else {
- op = cop.next;
- }
- }
- break;
-
- default:
- throw new RuntimeException("Unknown operation type: "+op.type);
- } // switch (op.type)
- } // while
- }
-
- private static final int getPreviousWordType(char[] target, int begin, int end,
- int offset, int opts) {
- int ret = getWordType(target, begin, end, --offset, opts);
- while (ret == WT_IGNORE)
- ret = getWordType(target, begin, end, --offset, opts);
- return ret;
- }
-
- private static final int getWordType(char[] target, int begin, int end,
- int offset, int opts) {
- if (offset < begin || offset >= end) return WT_OTHER;
- return getWordType0( target [ offset ] , opts);
- }
-
-
-
- private static final boolean regionMatches(char[] target, int offset, int limit,
- String part, int partlen) {
- if (offset < 0) return false;
- if (limit-offset < partlen)
- return false;
- int i = 0;
- while (partlen-- > 0) {
- if ( target [ offset++ ] != part.charAt(i++))
- return false;
- }
- return true;
- }
-
- private static final boolean regionMatches(char[] target, int offset, int limit,
- int offset2, int partlen) {
- if (offset < 0) return false;
- if (limit-offset < partlen)
- return false;
- int i = offset2;
- while (partlen-- > 0) {
- if ( target [ offset++ ] != target [ i++ ] )
- return false;
- }
- return true;
- }
-
-/**
- * @see java.lang.String#regionMatches
- */
- private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit,
- String part, int partlen) {
- if (offset < 0) return false;
- if (limit-offset < partlen)
- return false;
- int i = 0;
- while (partlen-- > 0) {
- char ch1 = target [ offset++ ] ;
- char ch2 = part.charAt(i++);
- if (ch1 == ch2)
- continue;
- char uch1 = Character.toUpperCase(ch1);
- char uch2 = Character.toUpperCase(ch2);
- if (uch1 == uch2)
- continue;
- if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2))
- return false;
- }
- return true;
- }
-
- private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit,
- int offset2, int partlen) {
- if (offset < 0) return false;
- if (limit-offset < partlen)
- return false;
- int i = offset2;
- while (partlen-- > 0) {
- char ch1 = target [ offset++ ] ;
- char ch2 = target [ i++ ] ;
- if (ch1 == ch2)
- continue;
- char uch1 = Character.toUpperCase(ch1);
- char uch2 = Character.toUpperCase(ch2);
- if (uch1 == uch2)
- continue;
- if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2))
- return false;
- }
- return true;
- }
-
-
-
-
- /**
* Checks whether the <var>target</var> text <strong>contains</strong> this pattern or not.
*
* @return true if the target is matched to this regular expression.
@@ -1404,7 +913,7 @@ public class RegularExpression implement
if (DEBUG) {
System.err.println("target string="+target);
}
- int matchEnd = this. matchString (con, this.operations, con.start, 1, this.options);
+ int matchEnd = this. match(con, this.operations, con.start, 1, this.options);
if (DEBUG) {
System.err.println("matchEnd="+matchEnd);
System.err.println("con.limit="+con.limit);
@@ -1464,7 +973,7 @@ public class RegularExpression implement
&& this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) {
if (isSet(this.options, SINGLE_LINE)) {
matchStart = con.start;
- matchEnd = this. matchString (con, this.operations, con.start, 1, this.options);
+ matchEnd = this.match(con, this.operations, con.start, 1, this.options);
} else {
boolean previousIsEOL = true;
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
@@ -1473,8 +982,8 @@ public class RegularExpression implement
previousIsEOL = true;
} else {
if (previousIsEOL) {
- if (0 <= (matchEnd = this. matchString (con, this.operations,
- matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this.match(con, this.operations,
+ matchStart, 1, this.options)))
break;
}
previousIsEOL = false;
@@ -1497,10 +1006,10 @@ public class RegularExpression implement
if (!range.match(ch)) {
continue;
}
- if (0 <= (matchEnd = this. matchString (con, this.operations,
- matchStart, 1, this.options))) {
+ if (0 <= (matchEnd = this.match(con, this.operations,
+ matchStart, 1, this.options))) {
break;
- }
+ }
}
}
@@ -1509,7 +1018,7 @@ public class RegularExpression implement
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this. matchString (con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this.match(con, this.operations, matchStart, 1, this.options)))
break;
}
}
@@ -1530,409 +1039,504 @@ public class RegularExpression implement
/**
* @return -1 when not match; offset of the end of matched string when match.
*/
- private int matchString (Context con, Op op, int offset, int dx, int opts) {
-
-
-
-
- String target = con.strTarget;
-
-
-
-
- while (true) {
- if (op == null)
- return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset;
- if (offset > con.limit || offset < con.start)
- return -1;
- switch (op.type) {
- case Op.CHAR:
- if (isSet(opts, IGNORE_CASE)) {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || !matchIgnoreCase(ch, target .charAt( offset ) ))
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .charAt( o1 ) ))
- return -1;
- offset = o1;
- }
- } else {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || ch != target .charAt( offset ) )
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || ch != target .charAt( o1 ) )
- return -1;
- offset = o1;
+ private int match(Context con, Op op, int offset, int dx, int opts) {
+ final ExpressionTarget target = con.target;
+ final Stack opStack = new Stack();
+ final IntStack dataStack = new IntStack();
+ final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE);
+ int retValue = -1;
+ boolean returned = false;
+
+ for (;;) {
+ if (op == null || offset > con.limit || offset < con.start) {
+ if (op == null) {
+ retValue = isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset;
+ }
+ else {
+ retValue = -1;
+ }
+ returned = true;
+ }
+ else {
+ retValue = -1;
+ // dx value is either 1 or -1
+ switch (op.type) {
+ case Op.CHAR:
+ {
+ final int o1 = (dx > 0) ? offset : offset -1;
+ if (o1 >= con.limit || o1 < 0 || !matchChar(op.getData(), target.charAt(o1), isSetIgnoreCase)) {
+ returned = true;
+ break;
+ }
+ offset += dx;
+ op = op.next;
}
- }
- op = op.next;
- break;
+ break;
- case Op.DOT:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target .charAt( offset ) ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- offset ++;
- } else {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) );
- if (isEOLChar(ch))
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target .charAt( o1 ) ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- o1 --;
- } else {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch);
- if (!isEOLChar(ch))
- return -1;
+ case Op.DOT:
+ {
+ int o1 = (dx > 0) ? offset : offset - 1;
+ if (o1 >= con.limit || o1 < 0) {
+ returned = true;
+ break;
+ }
+ if (isSet(opts, SINGLE_LINE)) {
+ if (REUtil.isHighSurrogate(target.charAt(o1)) && o1+dx >= 0 && o1+dx < con.limit) {
+ o1 += dx;
+ }
+ }
+ else {
+ int ch = target.charAt(o1);
+ if (REUtil.isHighSurrogate(ch) && o1+dx >= 0 && o1+dx < con.limit) {
+ o1 += dx;
+ ch = REUtil.composeFromSurrogates(ch, target.charAt(o1));
+ }
+ if (isEOLChar(ch)) {
+ returned = true;
+ break;
+ }
+ }
+ offset = (dx > 0) ? o1 + 1 : o1;
+ op = op.next;
}
- offset = o1;
- }
- op = op.next;
- break;
+ break;
- case Op.RANGE:
- case Op.NRANGE:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target .charAt( offset ) ;
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) {
- ch = REUtil.composeFromSurrogates(ch, target.charAt(++offset));
- }
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target .charAt( o1 ) ;
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) {
- ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch);
- }
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
+ case Op.RANGE:
+ case Op.NRANGE:
+ {
+ int o1 = (dx > 0) ? offset : offset -1;
+ if (o1 >= con.limit || o1 < 0) {
+ returned = true;
+ break;
+ }
+ int ch = target.charAt(offset);
+ if (REUtil.isHighSurrogate(ch) && o1+dx < con.limit && o1+dx >=0) {
+ o1 += dx;
+ ch = REUtil.composeFromSurrogates(ch, target.charAt(o1));
+ }
+ final RangeToken tok = op.getToken();
+ if (!tok.match(ch)) {
+ returned = true;
+ break;
+ }
+ offset = (dx > 0) ? o1+1 : o1;
+ op = op.next;
}
- offset = o1;
- }
- op = op.next;
- break;
+ break;
- case Op.ANCHOR:
- boolean go = false;
- switch (op.getData()) {
- case '^':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.start
- || offset > con.start && offset < con.limit && isEOLChar( target .charAt( offset-1 ) )))
- return -1;
- } else {
- if (offset != con.start)
- return -1;
+ case Op.ANCHOR:
+ {
+ if (!matchAnchor(target, op, con, offset, opts)) {
+ returned = true;
+ break;
+ }
+ op = op.next;
}
break;
- case '@': // Internal use only.
- // The @ always matches line beginnings.
- if (!(offset == con.start
- || offset > con.start && isEOLChar( target .charAt( offset-1 ) )))
- return -1;
+ case Op.BACKREFERENCE:
+ {
+ int refno = op.getData();
+ if (refno <= 0 || refno >= this.nofparen) {
+ throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno);
+ }
+ if (con.match.getBeginning(refno) < 0 || con.match.getEnd(refno) < 0) {
+ returned = true;
+ break;
+ }
+ int o2 = con.match.getBeginning(refno);
+ int literallen = con.match.getEnd(refno)-o2;
+ if (dx > 0) {
+ if (!target.regionMatches(isSetIgnoreCase, offset, con.limit, o2, literallen)) {
+ returned = true;
+ break;
+ }
+ offset += literallen;
+ }
+ else {
+ if (!target.regionMatches(isSetIgnoreCase, offset-literallen, con.limit, o2, literallen)) {
+ returned = true;
+ break;
+ }
+ offset -= literallen;
+ }
+ op = op.next;
+ }
break;
- case '$':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.limit
- || offset < con.limit && isEOLChar( target .charAt( offset ) )))
- return -1;
- } else {
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target .charAt( offset ) )
- || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN
- && target .charAt( offset+1 ) == LINE_FEED))
- return -1;
+ case Op.STRING:
+ {
+ String literal = op.getString();
+ int literallen = literal.length();
+ if (dx > 0) {
+ if (!target.regionMatches(isSetIgnoreCase, offset, con.limit, literal, literallen)) {
+ returned = true;
+ break;
+ }
+ offset += literallen;
+ }
+ else {
+ if (!target.regionMatches(isSetIgnoreCase, offset-literallen, con.limit, literal, literallen)) {
+ returned = true;
+ break;
+ }
+ offset -= literallen;
+ }
+ op = op.next;
}
break;
- case 'A':
- if (offset != con.start) return -1;
+ case Op.CLOSURE:
+ {
+ // Saves current position to avoid zero-width repeats.
+ final int id = op.getData();
+ int previousOffset = con.offsets[id];
+ if (previousOffset == offset) {
+ returned = true;
+ break;
+ }
+ con.offsets[id] = offset;
+ if (offset < previousOffset) {
+ op = op.next;
+ break;
+ }
+ }
+ // fall through
+
+ case Op.QUESTION:
+ {
+ opStack.push(op);
+ dataStack.push(offset);
+ op = op.getChild();
+ }
break;
- case 'Z':
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target .charAt( offset ) )
- || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN
- && target .charAt( offset+1 ) == LINE_FEED))
- return -1;
+ case Op.NONGREEDYCLOSURE:
+ case Op.NONGREEDYQUESTION:
+ {
+ opStack.push(op);
+ dataStack.push(offset);
+ op = op.next;
+ }
break;
- case 'z':
- if (offset != con.limit) return -1;
+ case Op.UNION:
+ if (op.size() == 0) {
+ returned = true;
+ }
+ else {
+ opStack.push(op);
+ dataStack.push(0);
+ dataStack.push(offset);
+ op = op.elementAt(0);
+ }
break;
- case 'b':
- if (con.length == 0) return -1;
+ case Op.CAPTURE:
{
- int after = getWordType(target, con.start, con.limit, offset, opts);
- if (after == WT_IGNORE) return -1;
- int before = getPreviousWordType(target, con.start, con.limit, offset, opts);
- if (after == before) return -1;
+ final int refno = op.getData();
+ if (con.match != null) {
+ if (refno > 0) {
+ dataStack.push(con.match.getBeginning(refno));
+ con.match.setBeginning(refno, offset);
+ }
+ else {
+ final int index = -refno;
+ dataStack.push(con.match.getEnd(index));
+ con.match.setEnd(index, offset);
+ }
+ opStack.push(op);
+ dataStack.push(offset);
+ }
+ op = op.next;
}
break;
- case 'B':
- if (con.length == 0)
- go = true;
- else {
- int after = getWordType(target, con.start, con.limit, offset, opts);
- go = after == WT_IGNORE
- || after == getPreviousWordType(target, con.start, con.limit, offset, opts);
+ case Op.LOOKAHEAD:
+ case Op.NEGATIVELOOKAHEAD:
+ case Op.LOOKBEHIND:
+ case Op.NEGATIVELOOKBEHIND:
+ {
+ opStack.push(op);
+ dataStack.push(dx);
+ dataStack.push(offset);
+ dx = (op.type == Op.LOOKAHEAD || op.type == Op.NEGATIVELOOKAHEAD) ? 1 : -1;
+ op = op.getChild();
}
- if (!go) return -1;
break;
- case '<':
- if (con.length == 0 || offset == con.limit) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER)
- return -1;
+ case Op.INDEPENDENT:
+ {
+ opStack.push(op);
+ dataStack.push(offset);
+ op = op.getChild();
+ }
break;
- case '>':
- if (con.length == 0 || offset == con.start) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER)
- return -1;
+ case Op.MODIFIER:
+ {
+ int localopts = opts;
+ localopts |= op.getData();
+ localopts &= ~op.getData2();
+ opStack.push(op);
+ dataStack.push(opts);
+ dataStack.push(offset);
+ opts = localopts;
+ op = op.getChild();
+ }
break;
- } // switch anchor type
- op = op.next;
- break;
- case Op.BACKREFERENCE:
- {
- int refno = op.getData();
- if (refno <= 0 || refno >= this.nofparen)
- throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno);
- if (con.match.getBeginning(refno) < 0
- || con.match.getEnd(refno) < 0)
- return -1; // ********
- int o2 = con.match.getBeginning(refno);
- int literallen = con.match.getEnd(refno)-o2;
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, o2, literallen))
- return -1;
- offset -= literallen;
+ case Op.CONDITION:
+ {
+ Op.ConditionOp cop = (Op.ConditionOp)op;
+ if (cop.refNumber > 0) {
+ if (cop.refNumber >= this.nofparen) {
+ throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber);
+ }
+ if (con.match.getBeginning(cop.refNumber) >= 0
+ && con.match.getEnd(cop.refNumber) >= 0) {
+ op = cop.yes;
+ }
+ else if (cop.no != null) {
+ op = cop.no;
+ }
+ else {
+ op = cop.next;
+ }
}
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- o2, literallen))
- return -1;
- offset -= literallen;
+ else {
+ opStack.push(op);
+ dataStack.push(offset);
+ op = cop.condition;
}
}
+ break;
+
+ default:
+ throw new RuntimeException("Unknown operation type: " + op.type);
}
- op = op.next;
- break;
- case Op.STRING:
- {
- String literal = op.getString();
- int literallen = literal.length();
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, literal, literallen))
- return -1;
- offset -= literallen;
+ }
+
+ // handle recursive operations
+ while (returned) {
+ // exhausted all the operations
+ if (opStack.isEmpty()) {
+ return retValue;
+ }
+
+ op = (Op) opStack.pop();
+ offset = dataStack.pop();
+
+ switch (op.type) {
+ case Op.CLOSURE:
+ con.offsets[op.getData()] = offset;
+ // fall through - same behavior as Op.Question
+
+ case Op.QUESTION:
+ if (retValue < 0) {
+ op = op.next;
+ returned = false;
+ }
+ break;
+
+ case Op.NONGREEDYCLOSURE:
+ case Op.NONGREEDYQUESTION:
+ if (retValue < 0) {
+ op = op.getChild();
+ returned = false;
+ }
+ break;
+
+ case Op.UNION:
+ {
+ int unionIndex = dataStack.pop();
+ if (DEBUG) {
+ System.err.println("UNION: "+unionIndex+", ret="+retValue);
}
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- literal, literallen))
- return -1;
- offset -= literallen;
+
+ if (retValue < 0) {
+ if (++unionIndex < op.size()) {
+ opStack.push(op);
+ dataStack.push(unionIndex);
+ dataStack.push(offset);
+ op = op.elementAt(unionIndex);
+ returned = false;
+ }
+ else {
+ retValue = -1;
+ }
}
}
- }
- op = op.next;
- break;
+ break;
- case Op.CLOSURE:
- {
- /*
- * Saves current position to avoid
- * zero-width repeats.
- */
- int id = op.getData();
- if (id >= 0) {
- int previousOffset = con.offsets[id];
- if (previousOffset < 0 || previousOffset != offset) {
- con.offsets[id] = offset;
- } else {
- con.offsets[id] = -1;
+ case Op.CAPTURE:
+ final int refno = op.getData();
+ final int saved = dataStack.pop();
+ if (retValue < 0) {
+ if (refno > 0) {
+ con.match.setBeginning(refno, saved);
+ }
+ else {
+ con.match.setEnd(-refno, saved);
+ }
+ }
+ break;
+
+ case Op.LOOKAHEAD:
+ case Op.LOOKBEHIND:
+ {
+ dx = dataStack.pop();
+ if (0 <= retValue) {
op = op.next;
- break;
+ returned = false;
}
+ retValue = -1;
}
- int ret = this. matchString (con, op.getChild(), offset, dx, opts);
- if (id >= 0) con.offsets[id] = -1;
- if (ret >= 0) return ret;
- op = op.next;
- }
- break;
+ break;
- case Op.QUESTION:
- {
- int ret = this. matchString (con, op.getChild(), offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.next;
- }
- break;
+ case Op.NEGATIVELOOKAHEAD:
+ case Op.NEGATIVELOOKBEHIND:
+ {
+ dx = dataStack.pop();
+ if (0 > retValue) {
+ op = op.next;
+ returned = false;
+ }
+ retValue = -1;
+ }
+ break;
- case Op.NONGREEDYCLOSURE:
- case Op.NONGREEDYQUESTION:
- {
- int ret = this. matchString (con, op.next, offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.getChild();
- }
- break;
+ case Op.MODIFIER:
+ opts = dataStack.pop();
+ // fall through
- case Op.UNION:
- for (int i = 0; i < op.size(); i ++) {
- int ret = this. matchString (con, op.elementAt(i), offset, dx, opts);
- if (DEBUG) {
- System.err.println("UNION: "+i+", ret="+ret);
- }
- if (ret >= 0) return ret;
- }
- return -1;
-
- case Op.CAPTURE:
- int refno = op.getData();
- if (con.match != null && refno > 0) {
- int save = con.match.getBeginning(refno);
- con.match.setBeginning(refno, offset);
- int ret = this. matchString (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setBeginning(refno, save);
- return ret;
- } else if (con.match != null && refno < 0) {
- int index = -refno;
- int save = con.match.getEnd(index);
- con.match.setEnd(index, offset);
- int ret = this. matchString (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setEnd(index, save);
- return ret;
- }
- op = op.next;
- break;
+ case Op.INDEPENDENT:
+ if (retValue >= 0) {
+ offset = retValue;
+ op = op.next;
+ returned = false;
+ }
+ break;
- case Op.LOOKAHEAD:
- if (0 > this. matchString (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKAHEAD:
- if (0 <= this. matchString (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.LOOKBEHIND:
- if (0 > this. matchString (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKBEHIND:
- if (0 <= this. matchString (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
+ case Op.CONDITION:
+ {
+ final Op.ConditionOp cop = (Op.ConditionOp)op;
+ if (0 <= retValue) {
+ op = cop.yes;
+ }
+ else if (cop.no != null) {
+ op = cop.no;
+ }
+ else {
+ op = cop.next;
+ }
+ }
+ returned = false;
+ break;
- case Op.INDEPENDENT:
- {
- int ret = this. matchString (con, op.getChild(), offset, dx, opts);
- if (ret < 0) return ret;
- offset = ret;
- op = op.next;
+ default:
+ break;
}
- break;
+ }
+ }
+ }
- case Op.MODIFIER:
- {
- int localopts = opts;
- localopts |= op.getData();
- localopts &= ~op.getData2();
- //System.err.println("MODIFIER: "+Integer.toString(opts, 16)+" -> "+Integer.toString(localopts, 16));
- int ret = this. matchString (con, op.getChild(), offset, dx, localopts);
- if (ret < 0) return ret;
- offset = ret;
- op = op.next;
- }
- break;
+ private boolean matchChar(int ch, int other, boolean ignoreCase) {
+ return (ignoreCase) ? matchIgnoreCase(ch, other) : ch == other;
+ }
- case Op.CONDITION:
- {
- Op.ConditionOp cop = (Op.ConditionOp)op;
- boolean matchp = false;
- if (cop.refNumber > 0) {
- if (cop.refNumber >= this.nofparen)
- throw new RuntimeException("Internal Error: Reference number must be more than zero: "+cop.refNumber);
- matchp = con.match.getBeginning(cop.refNumber) >= 0
- && con.match.getEnd(cop.refNumber) >= 0;
- } else {
- matchp = 0 <= this. matchString (con, cop.condition, offset, dx, opts);
- }
+ boolean matchAnchor(ExpressionTarget target, Op op, Context con, int offset, int opts) {
+ boolean go = false;
+ switch (op.getData()) {
+ case '^':
+ if (isSet(opts, MULTIPLE_LINES)) {
+ if (!(offset == con.start
+ || offset > con.start && offset < con.limit && isEOLChar(target.charAt(offset-1))))
+ return false;
+ } else {
+ if (offset != con.start)
+ return false;
+ }
+ break;
- if (matchp) {
- op = cop.yes;
- } else if (cop.no != null) {
- op = cop.no;
- } else {
- op = cop.next;
- }
- }
- break;
+ case '@': // Internal use only.
+ // The @ always matches line beginnings.
+ if (!(offset == con.start
+ || offset > con.start && isEOLChar(target.charAt(offset-1))))
+ return false;
+ break;
- default:
- throw new RuntimeException("Unknown operation type: "+op.type);
- } // switch (op.type)
- } // while
+ case '$':
+ if (isSet(opts, MULTIPLE_LINES)) {
+ if (!(offset == con.limit
+ || offset < con.limit && isEOLChar(target.charAt(offset))))
+ return false;
+ } else {
+ if (!(offset == con.limit
+ || offset+1 == con.limit && isEOLChar(target.charAt(offset))
+ || offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN
+ && target.charAt(offset+1) == LINE_FEED))
+ return false;
+ }
+ break;
+
+ case 'A':
+ if (offset != con.start) return false;
+ break;
+
+ case 'Z':
+ if (!(offset == con.limit
+ || offset+1 == con.limit && isEOLChar(target.charAt(offset))
+ || offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN
+ && target.charAt(offset+1) == LINE_FEED))
+ return false;
+ break;
+
+ case 'z':
+ if (offset != con.limit) return false;
+ break;
+
+ case 'b':
+ if (con.length == 0)
+ return false;
+ {
+ int after = getWordType(target, con.start, con.limit, offset, opts);
+ if (after == WT_IGNORE) return false;
+ int before = getPreviousWordType(target, con.start, con.limit, offset, opts);
+ if (after == before) return false;
+ }
+ break;
+
+ case 'B':
+ if (con.length == 0)
+ go = true;
+ else {
+ int after = getWordType(target, con.start, con.limit, offset, opts);
+ go = after == WT_IGNORE
+ || after == getPreviousWordType(target, con.start, con.limit, offset, opts);
+ }
+ if (!go) return false;
+ break;
+
+ case '<':
+ if (con.length == 0 || offset == con.limit) return false;
+ if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER
+ || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER)
+ return false;
+ break;
+
+ case '>':
+ if (con.length == 0 || offset == con.start) return false;
+ if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER
+ || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER)
+ return false;
+ break;
+ } // switch anchor type
+
+ return true;
}
- private static final int getPreviousWordType(String target, int begin, int end,
+ private static final int getPreviousWordType(ExpressionTarget target, int begin, int end,
int offset, int opts) {
int ret = getWordType(target, begin, end, --offset, opts);
while (ret == WT_IGNORE)
@@ -1940,41 +1544,12 @@ public class RegularExpression implement
return ret;
}
- private static final int getWordType(String target, int begin, int end,
+ private static final int getWordType(ExpressionTarget target, int begin, int end,
int offset, int opts) {
if (offset < begin || offset >= end) return WT_OTHER;
- return getWordType0( target .charAt( offset ) , opts);
- }
-
-
- private static final boolean regionMatches(String text, int offset, int limit,
- String part, int partlen) {
- if (limit-offset < partlen) return false;
- return text.regionMatches(offset, part, 0, partlen);
- }
-
- private static final boolean regionMatches(String text, int offset, int limit,
- int offset2, int partlen) {
- if (limit-offset < partlen) return false;
- return text.regionMatches(offset, text, offset2, partlen);
+ return getWordType0(target.charAt(offset) , opts);
}
- private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit,
- String part, int partlen) {
- return text.regionMatches(true, offset, part, 0, partlen);
- }
-
- private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit,
- int offset2, int partlen) {
- if (limit-offset < partlen) return false;
- return text.regionMatches(true, offset, text, offset2, partlen);
- }
-
-
-
-
-
-
/**
* Checks whether the <var>target</var> text <strong>contains</strong> this pattern or not.
@@ -2021,7 +1596,7 @@ public class RegularExpression implement
con.match = match;
if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) {
- int matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options);
+ int matchEnd = this.match(con, this.operations, con.start, 1, this.options);
//System.err.println("DEBUG: matchEnd="+matchEnd);
if (matchEnd == con.limit) {
if (con.match != null) {
@@ -2078,7 +1653,7 @@ public class RegularExpression implement
&& this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) {
if (isSet(this.options, SINGLE_LINE)) {
matchStart = con.start;
- matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options);
+ matchEnd = this.match(con, this.operations, con.start, 1, this.options);
} else {
boolean previousIsEOL = true;
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
@@ -2087,8 +1662,8 @@ public class RegularExpression implement
previousIsEOL = true;
} else {
if (previousIsEOL) {
- if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations,
- matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this.match(con, this.operations,
+ matchStart, 1, this.options)))
break;
}
previousIsEOL = false;
@@ -2111,8 +1686,8 @@ public class RegularExpression implement
if (!range.match(ch)) {
continue;
}
- if (0 <= (matchEnd = this.matchCharacterIterator(con, this.operations,
- matchStart, 1, this.options))) {
+ if (0 <= (matchEnd = this.match(con, this.operations,
+ matchStart, 1, this.options))) {
break;
}
}
@@ -2123,7 +1698,7 @@ public class RegularExpression implement
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options)))
break;
}
}
@@ -2141,547 +1716,281 @@ public class RegularExpression implement
}
}
+ // ================================================================
+
/**
- * @return -1 when not match; offset of the end of matched string when match.
+ * A regular expression.
+ * @serial
*/
- private int matchCharacterIterator (Context con, Op op, int offset, int dx, int opts) {
-
-
- CharacterIterator target = con.ciTarget;
-
-
-
-
-
-
- while (true) {
- if (op == null)
- return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset;
- if (offset > con.limit || offset < con.start)
- return -1;
- switch (op.type) {
- case Op.CHAR:
- if (isSet(opts, IGNORE_CASE)) {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || !matchIgnoreCase(ch, target .setIndex( offset ) ))
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .setIndex( o1 ) ))
- return -1;
- offset = o1;
- }
- } else {
- int ch = op.getData();
- if (dx > 0) {
- if (offset >= con.limit || ch != target .setIndex( offset ) )
- return -1;
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0 || ch != target .setIndex( o1 ) )
- return -1;
- offset = o1;
- }
- }
- op = op.next;
- break;
-
- case Op.DOT:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target .setIndex( offset ) ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- offset ++;
- } else {
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) );
- if (isEOLChar(ch))
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target .setIndex( o1 ) ;
- if (isSet(opts, SINGLE_LINE)) {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- o1 --;
- } else {
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch);
- if (!isEOLChar(ch))
- return -1;
- }
- offset = o1;
- }
- op = op.next;
- break;
-
- case Op.RANGE:
- case Op.NRANGE:
- if (dx > 0) {
- if (offset >= con.limit)
- return -1;
- int ch = target .setIndex( offset ) ;
- if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit)
- ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) );
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
- }
- offset ++;
- } else {
- int o1 = offset-1;
- if (o1 >= con.limit || o1 < 0)
- return -1;
- int ch = target .setIndex( o1 ) ;
- if (REUtil.isLowSurrogate(ch) && o1-1 >= 0)
- ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch);
- final RangeToken tok = op.getToken();
- if (!tok.match(ch)) {
- return -1;
- }
- offset = o1;
- }
- op = op.next;
- break;
-
- case Op.ANCHOR:
- boolean go = false;
- switch (op.getData()) {
- case '^':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.start
- || offset > con.start && offset < con.limit && isEOLChar( target .setIndex( offset-1 ) )))
- return -1;
- } else {
- if (offset != con.start)
- return -1;
- }
- break;
-
- case '@': // Internal use only.
- // The @ always matches line beginnings.
- if (!(offset == con.start
- || offset > con.start && isEOLChar( target .setIndex( offset-1 ) )))
- return -1;
- break;
+ String regex;
+ /**
+ * @serial
+ */
+ int options;
- case '$':
- if (isSet(opts, MULTIPLE_LINES)) {
- if (!(offset == con.limit
- || offset < con.limit && isEOLChar( target .setIndex( offset ) )))
- return -1;
- } else {
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) )
- || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN
- && target .setIndex( offset+1 ) == LINE_FEED))
- return -1;
- }
- break;
+ /**
+ * The number of parenthesis in the regular expression.
+ * @serial
+ */
+ int nofparen;
+ /**
+ * Internal representation of the regular expression.
+ * @serial
+ */
+ Token tokentree;
- case 'A':
- if (offset != con.start) return -1;
- break;
+ boolean hasBackReferences = false;
- case 'Z':
- if (!(offset == con.limit
- || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) )
- || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN
- && target .setIndex( offset+1 ) == LINE_FEED))
- return -1;
- break;
+ transient int minlength;
+ transient Op operations = null;
+ transient int numberOfClosures;
+ transient Context context = null;
+ transient RangeToken firstChar = null;
- case 'z':
- if (offset != con.limit) return -1;
- break;
+ transient String fixedString = null;
+ transient int fixedStringOptions;
+ transient BMPattern fixedStringTable = null;
+ transient boolean fixedStringOnly = false;
- case 'b':
- if (con.length == 0) return -1;
- {
- int after = getWordType(target, con.start, con.limit, offset, opts);
- if (after == WT_IGNORE) return -1;
- int before = getPreviousWordType(target, con.start, con.limit, offset, opts);
- if (after == before) return -1;
- }
- break;
+ static abstract class ExpressionTarget {
+ abstract char charAt(int index);
+ abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, String part, int partlen);
+ abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen);
+ }
+
+ static final class StringTarget extends ExpressionTarget {
+
+ private String target;
+
+ StringTarget(String target) {
+ this.target = target;
+ }
+
+ final void resetTarget(String target) {
+ this.target = target;
+ }
+
+ final char charAt(int index) {
+ return target.charAt(index);
+ }
+
+ final boolean regionMatches(boolean ignoreCase, int offset, int limit,
+ String part, int partlen) {
+ if (limit-offset < partlen) {
+ return false;
+ }
+ return (ignoreCase) ? target.regionMatches(true, offset, part, 0, partlen) : target.regionMatches(offset, part, 0, partlen);
+ }
- case 'B':
- if (con.length == 0)
- go = true;
- else {
- int after = getWordType(target, con.start, con.limit, offset, opts);
- go = after == WT_IGNORE
- || after == getPreviousWordType(target, con.start, con.limit, offset, opts);
- }
- if (!go) return -1;
- break;
+ final boolean regionMatches(boolean ignoreCase, int offset, int limit,
+ int offset2, int partlen) {
+ if (limit-offset < partlen) {
+ return false;
+ }
+ return (ignoreCase) ? target.regionMatches(true, offset, target, offset2, partlen)
+ : target.regionMatches(offset, target, offset2, partlen);
+ }
+ }
+
+ static final class CharArrayTarget extends ExpressionTarget {
+
+ char[] target;
+
+ CharArrayTarget(char[] target) {
+ this.target = target;
+ }
- case '<':
- if (con.length == 0 || offset == con.limit) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER)
- return -1;
- break;
+ final void resetTarget(char[] target) {
+ this.target = target;
+ }
- case '>':
- if (con.length == 0 || offset == con.start) return -1;
- if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER)
- return -1;
- break;
- } // switch anchor type
- op = op.next;
- break;
+ char charAt(int index) {
+ return target[index];
+ }
+
+ final boolean regionMatches(boolean ignoreCase, int offset, int limit,
+ String part, int partlen) {
+ if (offset < 0 || limit-offset < partlen) {
+ return false;
+ }
+ return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, part, partlen)
+ : regionMatches(offset, limit, part, partlen);
+ }
- case Op.BACKREFERENCE:
- {
- int refno = op.getData();
- if (refno <= 0 || refno >= this.nofparen)
- throw new RuntimeException("Internal Error: Reference number must be more than zero: "+refno);
- if (con.match.getBeginning(refno) < 0
- || con.match.getEnd(refno) < 0)
- return -1; // ********
- int o2 = con.match.getBeginning(refno);
- int literallen = con.match.getEnd(refno)-o2;
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, o2, literallen))
- return -1;
- offset -= literallen;
- }
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- o2, literallen))
- return -1;
- offset -= literallen;
- }
- }
- }
- op = op.next;
- break;
- case Op.STRING:
- {
- String literal = op.getString();
- int literallen = literal.length();
- if (!isSet(opts, IGNORE_CASE)) {
- if (dx > 0) {
- if (!regionMatches(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatches(target, offset-literallen, con.limit, literal, literallen))
- return -1;
- offset -= literallen;
- }
- } else {
- if (dx > 0) {
- if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen))
- return -1;
- offset += literallen;
- } else {
- if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit,
- literal, literallen))
- return -1;
- offset -= literallen;
- }
- }
+ private final boolean regionMatches(int offset, int limit, String part, int partlen) {
+ int i = 0;
+ while (partlen-- > 0) {
+ if (target[offset++] != part.charAt(i++)) {
+ return false;
}
- op = op.next;
- break;
+ }
+ return true;
+ }
- case Op.CLOSURE:
- {
- /*
- * Saves current position to avoid
- * zero-width repeats.
- */
- int id = op.getData();
- if (id >= 0) {
- int previousOffset = con.offsets[id];
- if (previousOffset < 0 || previousOffset != offset) {
- con.offsets[id] = offset;
- } else {
- con.offsets[id] = -1;
- op = op.next;
- break;
- }
- }
-
- int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts);
- if (id >= 0) con.offsets[id] = -1;
- if (ret >= 0) return ret;
- op = op.next;
+ private final boolean regionMatchesIgnoreCase(int offset, int limit, String part, int partlen) {
+ int i = 0;
+ while (partlen-- > 0) {
+ final char ch1 = target[offset++] ;
+ final char ch2 = part.charAt(i++);
+ if (ch1 == ch2) {
+ continue;
}
- break;
-
- case Op.QUESTION:
- {
- int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.next;
+ final char uch1 = Character.toUpperCase(ch1);
+ final char uch2 = Character.toUpperCase(ch2);
+ if (uch1 == uch2) {
+ continue;
}
- break;
-
- case Op.NONGREEDYCLOSURE:
- case Op.NONGREEDYQUESTION:
- {
- int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts);
- if (ret >= 0) return ret;
- op = op.getChild();
+ if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) {
+ return false;
}
- break;
+ }
+ return true;
+ }
- case Op.UNION:
- for (int i = 0; i < op.size(); i ++) {
- int ret = this. matchCharacterIterator (con, op.elementAt(i), offset, dx, opts);
- if (DEBUG) {
- System.err.println("UNION: "+i+", ret="+ret);
- }
- if (ret >= 0) return ret;
- }
- return -1;
-
- case Op.CAPTURE:
- int refno = op.getData();
- if (con.match != null && refno > 0) {
- int save = con.match.getBeginning(refno);
- con.match.setBeginning(refno, offset);
- int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setBeginning(refno, save);
- return ret;
- } else if (con.match != null && refno < 0) {
- int index = -refno;
- int save = con.match.getEnd(index);
- con.match.setEnd(index, offset);
- int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts);
- if (ret < 0) con.match.setEnd(index, save);
- return ret;
- }
- op = op.next;
- break;
+ final boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen) {
+ if (offset < 0 || limit-offset < partlen) {
+ return false;
+ }
+ return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, offset2, partlen)
+ : regionMatches(offset, limit, offset2, partlen);
+ }
- case Op.LOOKAHEAD:
- if (0 > this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKAHEAD:
- if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1;
- op = op.next;
- break;
- case Op.LOOKBEHIND:
- if (0 > this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
- case Op.NEGATIVELOOKBEHIND:
- if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1;
- op = op.next;
- break;
+ private final boolean regionMatches(int offset, int limit, int offset2, int partlen) {
+ int i = offset2;
+ while (partlen-- > 0) {
+ if ( target [ offset++ ] != target [ i++ ] )
+ return false;
+ }
+ return true;
+ }
- case Op.INDEPENDENT:
- {
- int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts);
- if (ret < 0) return ret;
- offset = ret;
- op = op.next;
+ private final boolean regionMatchesIgnoreCase(int offset, int limit, int offset2, int partlen) {
+ int i = offset2;
+ while (partlen-- > 0) {
[... 329 lines stripped ...]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org