You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2012/08/21 17:36:30 UTC

svn commit: r1375610 - /xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java

Author: mrglavas
Date: Tue Aug 21 15:36:30 2012
New Revision: 1375610

URL: http://svn.apache.org/viewvc?rev=1375610&view=rev
Log:
Introducing a system property for controlling how string length is computed by the schema validator. When org.apache.xerces.impl.dv.xs.useCodePointCountForStringLength=true, the length of an xs:string or xs:anyURI value is calculated by counting the number of Unicode code points in the string. The value of the system property is false by default, preserving the long standing behaviour of computing length in Java chars (i.e. String.length()).

Modified:
    xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java

Modified: xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java?rev=1375610&r1=1375609&r2=1375610&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/dv/xs/TypeValidator.java Tue Aug 21 15:36:30 2012
@@ -17,8 +17,12 @@
 
 package org.apache.xerces.impl.dv.xs;
 
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
 import org.apache.xerces.impl.dv.InvalidDatatypeValueException;
 import org.apache.xerces.impl.dv.ValidationContext;
+import org.apache.xerces.util.XMLChar;
 
 /**
  * All primitive types plus ID/IDREF/ENTITY/INTEGER are derived from this abstract
@@ -34,6 +38,15 @@ import org.apache.xerces.impl.dv.Validat
  * @version $Id$
  */
 public abstract class TypeValidator {
+    
+    private static final boolean USE_CODE_POINT_COUNT_FOR_STRING_LENGTH = AccessController.doPrivileged(new PrivilegedAction() {
+        public Object run() {
+            try {
+                return Boolean.getBoolean("org.apache.xerces.impl.dv.xs.useCodePointCountForStringLength") ? Boolean.TRUE : Boolean.FALSE;
+            }
+            catch (SecurityException ex) {}
+            return Boolean.FALSE;
+        }}) == Boolean.TRUE;
 
     // which facets are allowed for this type
     public abstract short getAllowedFacets();
@@ -79,7 +92,14 @@ public abstract class TypeValidator {
     // get the length of the value
     // the parameters are in compiled form (from getActualValue)
     public int getDataLength(Object value) {
-        return (value instanceof String) ? ((String)value).length() : -1;
+        if (value instanceof String) {
+            final String str = (String)value;
+            if (!USE_CODE_POINT_COUNT_FOR_STRING_LENGTH) {
+                return str.length();
+            }
+            return getCodePointLength(str);
+        }
+        return -1;
     }
 
     // get the number of digits of the value
@@ -93,6 +113,25 @@ public abstract class TypeValidator {
     public int getFractionDigits(Object value) {
         return -1;
     }
+    
+    // Returns the length of the string in Unicode code points.
+    private int getCodePointLength(String value) {
+        // Count the number of surrogate pairs, and subtract them from
+        // the total length.
+        final int len = value.length();
+        int surrogatePairCount = 0;
+        for (int i = 0; i < len - 1; ++i) {
+            if (XMLChar.isHighSurrogate(value.charAt(i))) {
+                if (XMLChar.isLowSurrogate(value.charAt(++i))) {
+                    ++surrogatePairCount;
+                }
+                else {
+                    --i;
+                }
+            }
+        }
+        return len - surrogatePairCount;
+    }
 
     // check whether the character is in the range 0x30 ~ 0x39
     public static final boolean isDigit(char ch) {



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org