You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@apache.org on 2001/01/29 01:19:01 UTC
cvs commit: jakarta-oro/src/java/org/apache/oro/text/regex OpCode.java Perl5Compiler.java Perl5Debug.java Perl5Matcher.java
dfs 01/01/28 16:19:01
Modified: . CHANGES TODO
build build-oro.xml
src/java/org/apache/oro/text/regex OpCode.java
Perl5Compiler.java Perl5Debug.java
Perl5Matcher.java
Added: . CONTRIBUTORS
Log:
o Applied a modified version of Takashi Okamoto's Unicode and POSIX character
class patch to OpCode, Perl5compiler, Perl5Debug, and Perl5Matcher.
o Removed Unicode from the TODO and added improve/optimize Unicode classes.
o Added a CONTRIBUTORS file to keep track of those who have contributed code
to the project.
o Incremented release to 2.0.2-dev-2
Revision Changes Path
1.3 +13 -2 jakarta-oro/CHANGES
Index: CHANGES
===================================================================
RCS file: /home/cvs/jakarta-oro/CHANGES,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- CHANGES 2000/12/24 03:34:23 1.2
+++ CHANGES 2001/01/29 00:19:00 1.3
@@ -1,6 +1,17 @@
-$Id: CHANGES,v 1.2 2000/12/24 03:34:23 dfs Exp $
+$Id: CHANGES,v 1.3 2001/01/29 00:19:00 dfs Exp $
-Version 2.0.2-dev
+Version 2.0.2-dev-2
+
+o Applied a modified version of Takashi Okamoto's unicode/posix patch.
+ It adds unicode support to character classes and adds partial support
+ for posix classes (it supports things like [:digit:] and [:print:], but
+ not [:^digit:] and [:^print:]). It will be improved/optimized later, but
+ gives people the functionality they need today.
+
+Version 2.0.2-dev-1
+
+o Removed commented out code and changed OpCode._isWordCharacter() to
+ use Character.isLetterOrDigit()
o Some documentation fixes.
1.2 +2 -3 jakarta-oro/TODO
Index: TODO
===================================================================
RCS file: /home/cvs/jakarta-oro/TODO,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TODO 2000/07/23 23:08:28 1.1
+++ TODO 2001/01/29 00:19:00 1.2
@@ -1,7 +1,6 @@
-$Id: TODO,v 1.1 2000/07/23 23:08:28 jon Exp $
+$Id: TODO,v 1.2 2001/01/29 00:19:00 dfs Exp $
-o Make Perl5 character classes (e.g., [abcde...]) fully support Unicode
- input. Currently character classes only match 8-bit characters.
+o Optimize/improve Unicode character classes.
o Fix any pending bugs listed in BUGS file.
1.1 jakarta-oro/CONTRIBUTORS
Index: CONTRIBUTORS
===================================================================
$Id: CONTRIBUTORS,v 1.1 2001/01/29 00:19:00 dfs Exp $
Daniel Savarese <df...@savarese.org> is the original author of the
OROMatcher, PerlTools, AwkTools, and TextTools packages that became
the Jakarta-ORO project.
Takashi Okamoto <to...@rd.nttdata.co.jp> has contributed a unicode
character class fix and an initial posix character class implementation.
1.9 +2 -2 jakarta-oro/build/build-oro.xml
Index: build-oro.xml
===================================================================
RCS file: /home/cvs/jakarta-oro/build/build-oro.xml,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- build-oro.xml 2000/12/24 03:34:23 1.8
+++ build-oro.xml 2001/01/29 00:19:00 1.9
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
-<!-- $Id: build-oro.xml,v 1.8 2000/12/24 03:34:23 dfs Exp $ -->
+<!-- $Id: build-oro.xml,v 1.9 2001/01/29 00:19:00 dfs Exp $ -->
<project name="Jakarta-ORO" default="main" basedir=".">
@@ -8,7 +8,7 @@
<target name="init">
<property name="Name" value="Jakarta-ORO"/>
<property name="year" value="2000"/>
- <property name="version" value="2.0.2-dev-1"/>
+ <property name="version" value="2.0.2-dev-2"/>
<property name="project" value="jakarta-oro"/>
<property name="build.compiler" value="classic"/>
<property name="code.src" value="../src"/>
1.4 +25 -5 jakarta-oro/src/java/org/apache/oro/text/regex/OpCode.java
Index: OpCode.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/OpCode.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- OpCode.java 2001/01/28 22:21:57 1.3
+++ OpCode.java 2001/01/29 00:19:00 1.4
@@ -63,7 +63,7 @@
* op-codes used in a compiled regular expression.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: OpCode.java,v 1.3 2001/01/28 22:21:57 dfs Exp $
+ @version $Id: OpCode.java,v 1.4 2001/01/29 00:19:00 dfs Exp $
*/
final class OpCode {
@@ -107,19 +107,36 @@
_IFMATCH = 31, // no Succeeds if the following matches.
_UNLESSM = 32, // no Fails if the following matches.
_SUCCEED = 33, // no Return from a subroutine, basically.
- _WHILEM = 34; // no Do curly processing and see if rest matches.
+ _WHILEM = 34, // no Do curly processing and see if rest matches.
+ _ANYOFUN = 35, // yes Match unicode character in this class.
+ _NANYOFUN= 36, // yes Match unicode character not in this class.
+ _RANGE = 37, // yes Range flag in
+ // Change the names of these constants later to make it clear they
+ // are POSIX classes.
+ _ALPHA = 38,
+ _BLANK = 39,
+ _CNTRL = 40,
+ _GRAPH = 41,
+ _LOWER = 42,
+ _PRINT = 43,
+ _PUNCT = 44,
+ _UPPER = 45,
+ _XDIGIT = 46,
+ _OPCODE = 47,
+ _ONECHAR = 48;
// Lengths of the various operands.
static final int _operandLength[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
};
static final char _opType[] = {
_END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY,
_CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM,
_NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
- _OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM
+ _OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM,
+ _ANYOFUN, _NANYOFUN
};
static final char _opLengthVaries[] = {
@@ -127,7 +144,8 @@
};
static final char _opLengthOne[] = {
- _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT
+ _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT,
+ _ANYOFUN, _NANYOFUN
};
static final int _NULL_OFFSET = -1;
@@ -181,5 +199,7 @@
// Matches Perl's definition of \w, which is different from [:alnum:]
static final boolean _isWordCharacter(char token) {
return (Character.isLetterOrDigit(token) || token == '_');
- }
+ }
}
+
+
1.6 +236 -1 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Compiler.java
Index: Perl5Compiler.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Compiler.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- Perl5Compiler.java 2001/01/28 22:21:57 1.5
+++ Perl5Compiler.java 2001/01/29 00:19:01 1.6
@@ -57,6 +57,8 @@
* by Daniel F. Savarese. We appreciate his contributions.
*/
+import java.util.Hashtable;
+
/**
* The Perl5Compiler class is used to create compiled regular expressions
* conforming to the Perl5 regular expression syntax. It generates
@@ -65,7 +67,7 @@
* information about Perl5 regular expressions.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Compiler.java,v 1.5 2001/01/28 22:21:57 dfs Exp $
+ @version $Id: Perl5Compiler.java,v 1.6 2001/01/29 00:19:01 dfs Exp $
* @see PatternCompiler
* @see MalformedPatternException
@@ -103,6 +105,26 @@
// keeps track of the current offset.
private char[] __program;
+ /** Lookup table for POSIX character class names */
+ private static final HashMap __hashPOSIX;
+
+ static {
+ __hashPOSIX = new HashMap();
+ __hashPOSIX.put("alnum", new Character('w'));
+ __hashPOSIX.put("alpha", new Character(OpCode._ALPHA));
+ __hashPOSIX.put("blank", new Character(OpCode._BLANK));
+ __hashPOSIX.put("cntrl", new Character(OpCode._CNTRL));
+ __hashPOSIX.put("digit", new Character('d'));
+ __hashPOSIX.put("graph", new Character(OpCode._GRAPH));
+ __hashPOSIX.put("lower", new Character(OpCode._LOWER));
+ __hashPOSIX.put("print", new Character(OpCode._PRINT));
+ __hashPOSIX.put("punct", new Character(OpCode._PUNCT));
+ __hashPOSIX.put("space", new Character('s'));
+ __hashPOSIX.put("upper", new Character(OpCode._UPPER));
+ __hashPOSIX.put("xdigit", new Character(OpCode._XDIGIT));
+ }
+
+
/**
* The default mask for the {@link #compile compile} methods.
* It is equal to 0.
@@ -567,7 +589,7 @@
case '[':
__input._increment();
- offset = __parseCharacterClass();
+ offset = __parseUnicodeClass();
retFlags[0] |= (__NONNULL | __SIMPLE);
break tryAgain;
@@ -1078,6 +1100,219 @@
__getNextChar();
return offset;
+ }
+
+
+ private int __parseUnicodeClass() throws MalformedPatternException {
+ boolean range = false, skipTest;
+ char clss, lastclss = Character.MAX_VALUE;
+ int offset, numLength[] = { 0 };
+ boolean opcodeFlag; /* clss isn't character when this flag true. */
+
+ if(__input._getValue() == '^') {
+ offset = __emitNode(OpCode._NANYOFUN);
+ __input._increment();
+ } else {
+ offset = __emitNode(OpCode._ANYOFUN);
+ }
+
+ clss = __input._getValue();
+
+ if(clss == ']' || clss == '-')
+ skipTest = true;
+ else
+ skipTest = false;
+
+ while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
+ || skipTest) {
+ // It sucks, but we have to make this assignment every time
+ skipTest = false;
+ opcodeFlag = false;
+ __input._increment();
+
+ if(clss == '\\' || clss == '[') {
+ if(clss == '\\') {
+ /* character is escaped */
+ clss = __input._postIncrement();
+ } else {
+ /* try POSIX expression */
+ char posixOpCode = __parsePOSIX();
+ if(posixOpCode != 0){
+ opcodeFlag = true;
+ clss = posixOpCode;
+ }
+ }
+
+ switch(clss){
+ case 'w':
+ opcodeFlag = true;
+ clss = OpCode._ALNUM;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'W':
+ opcodeFlag = true;
+ clss = OpCode._NALNUM;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 's':
+ opcodeFlag = true;
+ clss = OpCode._SPACE;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'S':
+ opcodeFlag = true;
+ clss = OpCode._NSPACE;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'd':
+ opcodeFlag = true;
+ clss = OpCode._DIGIT;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'D':
+ opcodeFlag = true;
+ clss = OpCode._NDIGIT;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'n':
+ clss = '\n';
+ break;
+ case 'r':
+ clss = '\r';
+ break;
+ case 't':
+ clss = '\t';
+ break;
+ case 'f':
+ clss = '\f';
+ break;
+ case 'b':
+ clss = '\b';
+ break;
+ case 'e':
+ clss = '\033';
+ break;
+ case 'a':
+ clss = '\007';
+ break;
+ case 'x':
+ clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
+ numLength);
+ __input._increment(numLength[0]);
+ break;
+ case 'c':
+ clss = __input._postIncrement();
+ if(Character.isLowerCase(clss))
+ clss = Character.toUpperCase(clss);
+ clss ^= 64;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ clss = (char)__parseOctal(__input._array, __input._getOffset() - 1,
+ 3, numLength);
+ __input._increment(numLength[0] - 1);
+ break;
+ default:
+ }
+ }
+
+ if(range) {
+ if(lastclss > clss)
+ throw new MalformedPatternException(
+ "Invalid [] range in expression.");
+ range = false;
+ } else {
+ lastclss = clss;
+
+ if(__input._getValue() == '-' &&
+ __input._getOffset() + 1 < __input._getLength() &&
+ __input._getValueRelative(1) != ']') {
+ __input._increment();
+ range = true;
+ continue;
+ }
+ }
+
+ if(lastclss == clss) {
+ if(opcodeFlag == true) {
+ __emitCode(OpCode._OPCODE);
+ } else {
+ __emitCode(OpCode._ONECHAR);
+ }
+ __emitCode(clss);
+
+ if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
+ Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
+ __programSize--;
+ __emitCode(Character.toLowerCase(clss));
+ }
+ }
+ if(lastclss < clss) {
+ __emitCode(OpCode._RANGE);
+ __emitCode(lastclss);
+ __emitCode(clss);
+
+ if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
+ Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
+ __programSize-=2;
+ __emitCode(Character.toLowerCase(lastclss));
+ __emitCode(Character.toLowerCase(clss));
+
+
+ }
+ lastclss = Character.MAX_VALUE;
+ range = false;
+ }
+
+ lastclss = clss;
+ }
+
+ if(__input._getValue() != ']')
+ throw new MalformedPatternException("Unmatched [] in expression.");
+
+ __getNextChar();
+ __emitCode(OpCode._END);
+ return offset;
+ }
+
+
+ /**
+ * parse POSIX exression like [:foo:].
+ *
+ * @return OpCode. return 0 when fail parsing POSIX expression.
+ */
+ private char __parsePOSIX() throws MalformedPatternException {
+ int offset = __input._getOffset();
+ int len = __input._getLength();
+ int pos = offset;
+ char value = __input._getValue(pos++);
+ StringBuffer buf;
+ Object opcode;
+
+ if( value != ':' ) return 0;
+
+ buf = new StringBuffer();
+
+ try {
+ while ( (value = __input._getValue(pos++)) != ':' && pos < len) {
+ buf.append(value);
+ }
+ } catch (Exception e){
+ return 0;
+ }
+
+ if( __input._getValue(pos++) != ']'){
+ return 0;
+ }
+
+ opcode = __hashPOSIX.get(buf.toString());
+
+ if( opcode == null )
+ return 0;
+
+ __input._setOffset(pos);
+
+ return ((Character)opcode).charValue();
}
1.3 +7 -3 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Debug.java
Index: Perl5Debug.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Debug.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Perl5Debug.java 2000/07/23 23:25:26 1.2
+++ Perl5Debug.java 2001/01/29 00:19:01 1.3
@@ -68,7 +68,7 @@
* comparison with the program generated by Perl5 with the -r option.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Debug.java,v 1.2 2000/07/23 23:25:26 jon Exp $
+ @version $Id: Perl5Debug.java,v 1.3 2001/01/29 00:19:01 dfs Exp $
* @see Perl5Pattern
*/
@@ -121,9 +121,11 @@
if(operator == OpCode._ANYOF) {
offset += 16;
+ } else if(operator == OpCode._ANYOFUN || operator == OpCode._NANYOFUN) {
+ offset+=(prog[offset-1]-2);
} else if(operator == OpCode._EXACTLY) {
- ++offset;
- buffer.append(" <");
+ ++offset;
+ buffer.append(" <");
//while(prog[offset] != '0')
while(prog[offset] != CharStringPointer._END_OF_STRING) {
@@ -176,6 +178,8 @@
case OpCode._ANY : str = "ANY"; break;
case OpCode._SANY : str = "SANY"; break;
case OpCode._ANYOF : str = "ANYOF"; break;
+ case OpCode._ANYOFUN : str = "ANYOFUN"; break;
+ case OpCode._NANYOFUN : str = "NANYOFUN"; break;
/*
case OpCode._ANYOF : // debug
buffer.append("ANYOF\n\n");
1.8 +140 -26 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java
Index: Perl5Matcher.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- Perl5Matcher.java 2001/01/28 22:21:57 1.7
+++ Perl5Matcher.java 2001/01/29 00:19:01 1.8
@@ -66,7 +66,7 @@
* Perl5Compiler.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Matcher.java,v 1.7 2001/01/28 22:21:57 dfs Exp $
+ @version $Id: Perl5Matcher.java,v 1.8 2001/01/29 00:19:01 dfs Exp $
* @see PatternMatcher
* @see Perl5Compiler
@@ -402,6 +402,7 @@
if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) {
boolean doEvery, tmp;
+ char op;
doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0);
@@ -410,7 +411,7 @@
endOffset -= dontTry;
tmp = true;
- switch(__program[offset]) {
+ switch(op = __program[offset]) {
case OpCode._ANYOF:
offset = OpCode._getOperand(offset);
while(__currentOffset < endOffset) {
@@ -430,6 +431,25 @@
break;
+ case OpCode._ANYOFUN:
+ case OpCode._NANYOFUN:
+ offset = OpCode._getOperand(offset);
+ while(__currentOffset < endOffset) {
+ ch = __input[__currentOffset];
+
+ if(__matchUnicodeClass(ch, __program, offset, op)) {
+ if(tmp && __tryExpression(expression, __currentOffset)) {
+ success = true;
+ break _mainLoop;
+ } else
+ tmp = doEvery;
+ } else
+ tmp = true;
+ ++__currentOffset;
+ }
+
+ break;
+
case OpCode._BOUND:
if(minLength > 0) {
++dontTry;
@@ -603,11 +623,94 @@
return success;
}
+
+ private boolean __matchUnicodeClass(char code, char __program[],
+ int offset ,char opcode)
+ {
+ boolean isANYOF = ( opcode == OpCode._ANYOFUN );
+ while( __program[offset] != OpCode._END ){
+ if( __program[offset] == OpCode._RANGE ){
+ offset++;
+ if((code >= __program[offset]) && (code <= __program[offset+1])){
+ return isANYOF;
+ } else {
+ offset+=2;
+ }
+ } else if( __program[offset] == OpCode._OPCODE ){
+ offset++;
+ switch ( __program[offset++] ) {
+ case OpCode._ALNUM:
+ if(OpCode._isWordCharacter(code)) return isANYOF;
+ break;
+ case OpCode._NALNUM:
+ if(!OpCode._isWordCharacter(code)) return isANYOF;
+ break;
+ case OpCode._SPACE:
+ if(Character.isWhitespace(code)) return isANYOF;
+ break;
+ case OpCode._NSPACE:
+ if(!Character.isWhitespace(code)) return isANYOF;
+ break;
+ case OpCode._DIGIT:
+ if(Character.isDigit(code)) return isANYOF;
+ break;
+ case OpCode._NDIGIT:
+ if(!Character.isDigit(code)) return isANYOF;
+ break;
+ case OpCode._BLANK:
+ if(Character.isSpaceChar(code)) return isANYOF;
+ break;
+ case OpCode._CNTRL:
+ if(Character.isISOControl(code)) return isANYOF;
+ break;
+ case OpCode._LOWER:
+ if(Character.isLowerCase(code)) return isANYOF;
+ break;
+ case OpCode._UPPER:
+ if(Character.isUpperCase(code)) return isANYOF;
+ break;
+ case OpCode._PRINT:
+ if(Character.isSpaceChar(code)) return isANYOF;
+ // Fall through to check if the character is alphanumeric,
+ // or a punctuation mark. Printable characters are either
+ // alphanumeric, punctuation marks, or spaces.
+ case OpCode._GRAPH:
+ if(Character.isLetterOrDigit(code)
+ return isANYOF;
+ // Fall through to check if the character is a punctuation mark.
+ // Graph characters are either alphanumeric or punctuation.
+ case OpCode._PUNCT:
+ switch ( Character.getType(code) ) {
+ case Character.DASH_PUNCTUATION:
+ case Character.START_PUNCTUATION:
+ case Character.END_PUNCTUATION:
+ case Character.CONNECTOR_PUNCTUATION:
+ case Character.OTHER_PUNCTUATION:
+ return isANYOF;
+ default:
+ break;
+ }
+ break;
+ case OpCode._XDIGIT:
+ if( (code >= '0' && code <= '9') ||
+ (code >= 'a' && code <= 'f') ||
+ (code >= 'A' && code <= 'F')) return isANYOF;
+ break;
+ }
+ } else if((__program[offset++] == OpCode._ONECHAR) &&
+ (__program[offset++] == code))
+ {
+ return isANYOF;
+ }
+ }
+ return !isANYOF;
+ }
+
private boolean __tryExpression(Perl5Pattern expression, int offset) {
int count;
-
+
__inputOffset = offset;
__lastParen = 0;
__expSize = 0;
@@ -632,6 +735,7 @@
private int __repeat(int offset, int max) {
int scan, eol, operand, ret;
char ch;
+ char op;
scan = __inputOffset;
eol = __eol;
@@ -641,7 +745,7 @@
operand = OpCode._getOperand(offset);
- switch(__program[offset]) {
+ switch(op = __program[offset]) {
case OpCode._ANY:
while(scan < eol && __input[scan] != '\n')
@@ -660,7 +764,20 @@
case OpCode._ANYOF:
if(scan < eol && (ch = __input[scan]) < 256) {
- while((__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
+ while((ch < 256 ) && (__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
+ if(++scan < eol)
+ ch = __input[scan];
+ else
+ break;
+ }
+ }
+ break;
+
+ case OpCode._ANYOFUN:
+ case OpCode._NANYOFUN:
+ if(scan < eol) {
+ ch = __input[scan];
+ while(__matchUnicodeClass(ch, __program, operand, op)){
if(++scan < eol)
ch = __input[scan];
else
@@ -820,6 +937,23 @@
nextChar = (inputRemains ? __input[input] : __EOS);
break;
+ case OpCode._ANYOFUN:
+ case OpCode._NANYOFUN:
+ current = OpCode._getOperand(scan);
+
+ if(nextChar == __EOS && inputRemains)
+ nextChar = __input[input];
+
+ if(!__matchUnicodeClass(nextChar, __program, current, op))
+ return false;
+
+ if(!inputRemains && input >= __eol)
+ return false;
+
+ inputRemains = (++input < __endOffset);
+ nextChar = (inputRemains ? __input[input] : __EOS);
+ break;
+
case OpCode._ALNUM:
if(!inputRemains)
return false;
@@ -1389,13 +1523,7 @@
__originalInput = input;
if(expression._isCaseInsensitive)
input = _toLower(input);
- /*
- if(__interpret(expression, input, 0, input.length)) {
- if(__lastMatchResult.beginOffset(0) == 0 &&
- __lastMatchResult.endOffset(0) == input.length)
- return true;
- }
- */
+
__initInterpreterGlobals(expression, input, 0, input.length, 0);
__lastSuccess = (__tryExpression(expression, 0) &&
__endMatchOffsets[0] == input.length);
@@ -1549,20 +1677,6 @@
* Perl5Pattern is passed as the pattern parameter.
*/
public boolean contains(String input, Pattern pattern) {
- /*
- char[] inp;
- Perl5Pattern expression;
-
- expression = (Perl5Pattern)pattern;
-
- __originalInput = inp = input.toCharArray();
-
- if(expression._isCaseInsensitive)
- //_toLower(inp, false);
- inp = _toLower(inp, false);
-
- return __interpret(expression, inp, 0, inp.length);
- */
return contains(input.toCharArray(), pattern);
}