You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2005/06/23 17:55:18 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/impl/xpath/regex ParserForXMLSchema.java

mrglavas    2005/06/23 08:55:18

  Modified:    java/src/org/apache/xerces/impl/xpath/regex
                        ParserForXMLSchema.java
  Log:
  Implementing first edition erratum E2-67:
  http://www.w3.org/2001/05/xmlschema-errata#e2-67
  
  This reallows character ranges which look like [-+] and [+-].
  The XHTML schema [1] makes use of such character ranges
  and was rejected by Xerces 2.6.2. We're now able to
  process it.
  
  Thanks to Ankit Pasricha for providing this patch.
  
  [1] http://www.w3.org/2002/08/xhtml/xhtml1-strict.xsd
  
  Revision  Changes    Path
  1.9       +19 -4     xml-xerces/java/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
  
  Index: ParserForXMLSchema.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- ParserForXMLSchema.java	22 Mar 2005 03:22:37 -0000	1.8
  +++ ParserForXMLSchema.java	23 Jun 2005 15:55:18 -0000	1.9
  @@ -169,6 +169,7 @@
           this.setContext(S_INBRACKETS);
           this.next();                            // '['
           boolean nrange = false;
  +        boolean wasDecoded = false;     		// used to detect if the last - was escaped.
           RangeToken base = null;
           RangeToken tok;
           if (this.read() == T_CHAR && this.chardata == '^') {
  @@ -183,6 +184,8 @@
           int type;
           boolean firstloop = true;
           while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
  +        	
  +        	wasDecoded = false;
               // single-range | from-to-range | subtraction
               if (type == T_CHAR && this.chardata == ']' && !firstloop) {
                   if (nrange) {
  @@ -216,6 +219,11 @@
                       tok.mergeRanges(tok2);
                       end = true;
                       break;
  +                   
  +                 case '-':
  +                 	c = this.decodeEscaped();
  +                 	wasDecoded = true;
  +                 	break;
   
                     default:
                       c = this.decodeEscaped();
  @@ -238,7 +246,10 @@
                   if (type == T_CHAR) {
                       if (c == '[')  throw this.ex("parser.cc.6", this.offset-2);
                       if (c == ']')  throw this.ex("parser.cc.7", this.offset-2);
  -                    if (c == '-')  throw this.ex("parser.cc.8", this.offset-2);
  +                    if (c == '-' && this.chardata == ']' && firstloop)  throw this.ex("parser.cc.8", this.offset-2);	// if regex = '[-]' then invalid
  +                }
  +                if(c == '-' && this.chardata == '-' && this.read() != T_BACKSOLIDUS && !wasDecoded) {
  +                	throw this.ex("parser.cc.8", this.offset-2);
                   }
                   if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
                       tok.addRange(c, c);
  @@ -247,10 +258,14 @@
                       this.next(); // Skips '-'
                       if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
                                                   // c '-' ']' -> '-' is a single-range.
  -                    if ((type == T_CHAR && this.chardata == ']')
  -                        || type == T_XMLSCHEMA_CC_SUBTRACTION) {
  +                    if(type == T_CHAR && this.chardata == ']') {				// if - is at the last position of the group
  +                    	tok.addRange(c, c);
  +                    	tok.addRange('-', '-');
  +                    }
  +                    else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
                           throw this.ex("parser.cc.8", this.offset-1);
                       } else {
  +                    	
                           int rangeend = this.chardata;
                           if (type == T_CHAR) {
                               if (rangeend == '[')  throw this.ex("parser.cc.6", this.offset-1);
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org