You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2014/11/11 21:22:21 UTC

svn commit: r1638353 - in /xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex: Token.java XML11TokenMap.java XMLTokenMap.java

Author: mrglavas
Date: Tue Nov 11 20:22:20 2014
New Revision: 1638353

URL: http://svn.apache.org/r1638353
Log:
Fixing JIRA Issue #1651: https://issues.apache.org/jira/browse/XERCESJ-1651. \w should be processed as [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] as per the XML Schema specification.

Modified:
    xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
    xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
    xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java

Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/Token.java Tue Nov 11 20:22:20 2014
@@ -807,6 +807,7 @@ class Token implements java.io.Serializa
                     ranges[type].addRange(i, i);
                 } // for all characters
                 ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
+                ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
 
                 for (int i = 0;  i < ranges.length;  i ++) {
                     if (Token.categoryNames[i] != null) {

Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XML11TokenMap.java Tue Nov 11 20:22:20 2014
@@ -51,11 +51,16 @@ final class XML11TokenMap implements Ran
         ranges.put("xml:isDigit", tok);
         ranges2.put("xml:isDigit", Token.complementRanges(tok));
 
+        /*
+         * \w is defined by the XML Schema specification to be:
+         * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 
+         */
         tok = Token.createRange();
-        REUtil.setupRange(tok, REConstants.LETTERS);
-        tok.mergeRanges((Token)ranges.get("xml:isDigit"));
-        ranges.put("xml:isWord", tok);
-        ranges2.put("xml:isWord", Token.complementRanges(tok));
+        tok.mergeRanges(Token.getRange("P", true));
+        tok.mergeRanges(Token.getRange("Z", true));
+        tok.mergeRanges(Token.getRange("C", true));
+        ranges2.put("xml:isWord", tok);
+        ranges.put("xml:isWord", Token.complementRanges(tok));
 
         tok = Token.createRange();
         REUtil.setupRange(tok, REConstants.NAMECHARS11_INTS);

Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java?rev=1638353&r1=1638352&r2=1638353&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xpath/regex/XMLTokenMap.java Tue Nov 11 20:22:20 2014
@@ -51,11 +51,16 @@ final class XMLTokenMap implements Range
         ranges.put("xml:isDigit", tok);
         ranges2.put("xml:isDigit", Token.complementRanges(tok));
 
+        /*
+         * \w is defined by the XML Schema specification to be:
+         * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 
+         */
         tok = Token.createRange();
-        REUtil.setupRange(tok, REConstants.LETTERS);
-        tok.mergeRanges((Token)ranges.get("xml:isDigit"));
-        ranges.put("xml:isWord", tok);
-        ranges2.put("xml:isWord", Token.complementRanges(tok));
+        tok.mergeRanges(Token.getRange("P", true));
+        tok.mergeRanges(Token.getRange("Z", true));
+        tok.mergeRanges(Token.getRange("C", true));
+        ranges2.put("xml:isWord", tok);
+        ranges.put("xml:isWord", Token.complementRanges(tok));
 
         tok = Token.createRange();
         REUtil.setupRange(tok, REConstants.NAMECHARS);



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org