You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2005/06/23 17:55:18 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/impl/xpath/regex ParserForXMLSchema.java
mrglavas 2005/06/23 08:55:18
Modified: java/src/org/apache/xerces/impl/xpath/regex
ParserForXMLSchema.java
Log:
Implementing first edition erratum E2-67:
http://www.w3.org/2001/05/xmlschema-errata#e2-67
This reallows character ranges which look like [-+] and [+-].
The XHTML schema [1] makes use of such character ranges
and was rejected by Xerces 2.6.2. We're now able to
process it.
Thanks to Ankit Pasricha for providing this patch.
[1] http://www.w3.org/2002/08/xhtml/xhtml1-strict.xsd
Revision Changes Path
1.9 +19 -4 xml-xerces/java/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
Index: ParserForXMLSchema.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- ParserForXMLSchema.java 22 Mar 2005 03:22:37 -0000 1.8
+++ ParserForXMLSchema.java 23 Jun 2005 15:55:18 -0000 1.9
@@ -169,6 +169,7 @@
this.setContext(S_INBRACKETS);
this.next(); // '['
boolean nrange = false;
+ boolean wasDecoded = false; // used to detect if the last - was escaped.
RangeToken base = null;
RangeToken tok;
if (this.read() == T_CHAR && this.chardata == '^') {
@@ -183,6 +184,8 @@
int type;
boolean firstloop = true;
while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
+
+ wasDecoded = false;
// single-range | from-to-range | subtraction
if (type == T_CHAR && this.chardata == ']' && !firstloop) {
if (nrange) {
@@ -216,6 +219,11 @@
tok.mergeRanges(tok2);
end = true;
break;
+
+ case '-':
+ c = this.decodeEscaped();
+ wasDecoded = true;
+ break;
default:
c = this.decodeEscaped();
@@ -238,7 +246,10 @@
if (type == T_CHAR) {
if (c == '[') throw this.ex("parser.cc.6", this.offset-2);
if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
- if (c == '-') throw this.ex("parser.cc.8", this.offset-2);
+ if (c == '-' && this.chardata == ']' && firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
+ }
+ if(c == '-' && this.chardata == '-' && this.read() != T_BACKSOLIDUS && !wasDecoded) {
+ throw this.ex("parser.cc.8", this.offset-2);
}
if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
tok.addRange(c, c);
@@ -247,10 +258,14 @@
this.next(); // Skips '-'
if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
// c '-' ']' -> '-' is a single-range.
- if ((type == T_CHAR && this.chardata == ']')
- || type == T_XMLSCHEMA_CC_SUBTRACTION) {
+ if(type == T_CHAR && this.chardata == ']') { // if - is at the last position of the group
+ tok.addRange(c, c);
+ tok.addRange('-', '-');
+ }
+ else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
throw this.ex("parser.cc.8", this.offset-1);
} else {
+
int rangeend = this.chardata;
if (type == T_CHAR) {
if (rangeend == '[') throw this.ex("parser.cc.6", this.offset-1);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org