You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2014/11/11 21:15:46 UTC

svn commit: r1638344 - in /xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex: ParserForXMLSchema.java Token.java

Author: mrglavas
Date: Tue Nov 11 20:15:46 2014
New Revision: 1638344

URL: http://svn.apache.org/r1638344
Log:
Fixing JIRA Issue #1651: https://issues.apache.org/jira/browse/XERCESJ-1651. \w should be processed as [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] as per the XML Schema specification.

Modified:
    xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
    xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java

Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java Tue Nov 11 20:15:46 2014
@@ -384,11 +384,16 @@ class ParserForXMLSchema extends RegexPa
             ranges.put("xml:isDigit", tok);
             ranges2.put("xml:isDigit", Token.complementRanges(tok));
 
+            /*
+             * \w is defined by the XML Schema specification to be:
+             * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 
+             */
             tok = Token.createRange();
-            setupRange(tok, LETTERS);
-            tok.mergeRanges((Token)ranges.get("xml:isDigit"));
-            ranges.put("xml:isWord", tok);
-            ranges2.put("xml:isWord", Token.complementRanges(tok));
+            tok.mergeRanges(Token.getRange("P", true));
+            tok.mergeRanges(Token.getRange("Z", true));
+            tok.mergeRanges(Token.getRange("C", true));
+            ranges2.put("xml:isWord", tok);
+            ranges.put("xml:isWord", Token.complementRanges(tok));
 
             tok = Token.createRange();
             setupRange(tok, NAMECHARS);

Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java Tue Nov 11 20:15:46 2014
@@ -803,6 +803,7 @@ class Token implements java.io.Serializa
                     ranges[type].addRange(i, i);
                 } // for all characters
                 ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
+                ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
 
                 for (int i = 0;  i < ranges.length;  i ++) {
                     if (Token.categoryNames[i] != null) {



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org