You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2014/11/11 21:15:46 UTC
svn commit: r1638344 - in
/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex:
ParserForXMLSchema.java Token.java
Author: mrglavas
Date: Tue Nov 11 20:15:46 2014
New Revision: 1638344
URL: http://svn.apache.org/r1638344
Log:
Fixing JIRA Issue #1651: https://issues.apache.org/jira/browse/XERCESJ-1651. \w should be processed as [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] as per the XML Schema specification.
Modified:
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java Tue Nov 11 20:15:46 2014
@@ -384,11 +384,16 @@ class ParserForXMLSchema extends RegexPa
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
+ /*
+ * \w is defined by the XML Schema specification to be:
+ * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
+ */
tok = Token.createRange();
- setupRange(tok, LETTERS);
- tok.mergeRanges((Token)ranges.get("xml:isDigit"));
- ranges.put("xml:isWord", tok);
- ranges2.put("xml:isWord", Token.complementRanges(tok));
+ tok.mergeRanges(Token.getRange("P", true));
+ tok.mergeRanges(Token.getRange("Z", true));
+ tok.mergeRanges(Token.getRange("C", true));
+ ranges2.put("xml:isWord", tok);
+ ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, NAMECHARS);
Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java?rev=1638344&r1=1638343&r2=1638344&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/Token.java Tue Nov 11 20:15:46 2014
@@ -803,6 +803,7 @@ class Token implements java.io.Serializa
ranges[type].addRange(i, i);
} // for all characters
ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
+ ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
for (int i = 0; i < ranges.length; i ++) {
if (Token.categoryNames[i] != null) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org