You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2010/10/18 06:37:41 UTC
svn commit: r1023648 - in /xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs: XMLAssertPsychopathImpl.java opti/SchemaDOMParser.java

Author: mrglavas
Date: Mon Oct 18 04:37:41 2010
New Revision: 1023648

URL: http://svn.apache.org/viewvc?rev=1023648&view=rev
Log:
Fixing subtle bugs with string tokenization. The regex "\\s+" matches a couple characters which are not XML white spaces. Use a StringTokenizer instead with the four white spaces chars (0x9, 0xA, 0xD, 0x20) as delimiters. Should be slightly faster too since the split() method ends up compiling a Pattern every time you call it.

Modified:
    xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java
    xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java

Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java?rev=1023648&r1=1023647&r2=1023648&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java Mon Oct 18 04:37:41 2010
@@ -22,6 +22,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Stack;
+import java.util.StringTokenizer;
 import java.util.Vector;
 
 import org.apache.xerces.dom.CoreDocumentImpl;
@@ -384,14 +385,15 @@ public class XMLAssertPsychopathImpl ext
                     // of a simpleType -> list.
                     // tokenize the list value by the longest sequence of
                     // white-spaces.
-                    String[] values = value.split("\\s+");
+                    StringTokenizer values = new StringTokenizer(value, " \n\t\r");
                     
                     // evaluate assertion on all of list items
-                    for (int valIdx = 0; valIdx < values.length; valIdx++) {
-                        setValueOf$valueForAListItem(attrType, values[valIdx]);                        
+                    while (values.hasMoreTokens()) {
+                        String itemValue = values.nextToken();
+                        setValueOf$valueForAListItem(attrType, itemValue);                        
                         AssertionError assertError = evaluateAssertion(element, 
                                                                     assertImpl, 
-                                                                values[valIdx], 
+                                                                    itemValue, 
                                                             xpathContextExists,
                                                                         true);
                         if (assertError != null) {
@@ -465,17 +467,18 @@ public class XMLAssertPsychopathImpl ext
                // evaluating assertions for simpleType -> list.                    
                // tokenize the list value by the longest sequence of
                // white-spaces.
-               String[] values = value.split("\\s+");               
+               StringTokenizer values = new StringTokenizer(value, " \n\t\r");
                
                // evaluate assertion on all of list items
-               for (int valIdx = 0; valIdx < values.length; valIdx++) {
-                  setValueOf$valueForAListItem(itemType, values[valIdx]);
-                  AssertionError assertError = evaluateAssertion(element, 
-                                                 assertImpl, values[valIdx], 
-                                                 false, true);
-                  if (assertError != null) {
-                      reportAssertionsError(assertError);    
-                  }
+               while (values.hasMoreTokens()) {
+                   String itemValue = values.nextToken();
+                   setValueOf$valueForAListItem(itemType, itemValue);
+                   AssertionError assertError = evaluateAssertion(element, 
+                           assertImpl, itemValue, 
+                           false, true);
+                   if (assertError != null) {
+                       reportAssertionsError(assertError);    
+                   }
                }
             }
             else if (memberTypes != null && memberTypes.getLength() == 0) {
@@ -797,7 +800,7 @@ public class XMLAssertPsychopathImpl ext
                                                                  getItemType();
             // split the "string value" of list contents, into non white-space
             // tokens.
-            String[] values = value.split("\\s+");
+            StringTokenizer values = new StringTokenizer(value, " \n\t\r");
             
             // construct a list of atomic XDM items, to assign to XPath2
             // context variable $value.
@@ -805,23 +808,25 @@ public class XMLAssertPsychopathImpl ext
             if ((listItemType.getMemberTypes()).getLength() > 0) {
                // itemType of xs:list has variety 'union'. here list items may
                // have different types, which are determined below.
-               for (int valIdx = 0; valIdx < values.length; valIdx++) {
+               while (values.hasMoreTokens()) {
+                   String itemValue = values.nextToken();
                    XSSimpleTypeDefinition listItemTypeForUnion = 
                                          getActualListItemTypeForVarietyUnion
                                                (listItemType.getMemberTypes(), 
-                                                              values[valIdx]);
+                                                       itemValue);
                    xdmItemList.add(SchemaTypeValueFactory.newSchemaTypeValue
                                                           (listItemTypeForUnion.
-                                              getBuiltInKind(),values[valIdx]));
+                                              getBuiltInKind(), itemValue));
                }                                  
             }
             else {
                // every list item has a same type (the itemType of
                // xs:list).
-               for (int valIdx = 0; valIdx < values.length; valIdx++) {
+               while (values.hasMoreTokens()) {
+                   String itemValue = values.nextToken();
                    xdmItemList.add(SchemaTypeValueFactory.newSchemaTypeValue
                                                (listItemType.getBuiltInKind(), 
-                                                             values[valIdx])); 
+                                                       itemValue)); 
                }                                  
             }
 

Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java?rev=1023648&r1=1023647&r2=1023648&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java Mon Oct 18 04:37:41 2010
@@ -19,6 +19,7 @@ package org.apache.xerces.impl.xs.opti;
 
 import java.util.Iterator;
 import java.util.List;
+import java.util.StringTokenizer;
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.util.ArrayList;
@@ -658,19 +659,17 @@ public class SchemaDOMParser extends Def
                                 nfe.getMessage()}, XMLErrorReporter.SEVERITY_ERROR);
                     }
                 }
-                // '\\s+' is regex denoting longest sequence of consecutive
-                // white-space characters.
                 else if (SchemaSymbols.ATT_TYPEAVAILABLE.equals(attrLocalName)) {
-                   typeAvailableList = tokenizeString(attrValue, "\\s+");
+                   typeAvailableList = tokenizeString(attrValue);
                 }
                 else if (SchemaSymbols.ATT_TYPEUNAVAILABLE.equals(attrLocalName)) {
-                    typeUnavailableList = tokenizeString(attrValue, "\\s+");
+                    typeUnavailableList = tokenizeString(attrValue);
                 }
                 else if (SchemaSymbols.ATT_FACETAVAILABLE.equals(attrLocalName)) {
-                    facetAvailableList = tokenizeString(attrValue, "\\s+");
+                    facetAvailableList = tokenizeString(attrValue);
                 }
                 else if (SchemaSymbols.ATT_FACETUNAVAILABLE.equals(attrLocalName)) {
-                    facetUnavailableList = tokenizeString(attrValue, "\\s+");
+                    facetUnavailableList = tokenizeString(attrValue);
                 }
                 else {
                     // report a warning
@@ -852,20 +851,16 @@ public class SchemaDOMParser extends Def
 
     
     /*
-     * Method to tokenize a string value given a tokenizing delimiter, and
-     * return a List containing the string tokens. 
+     * Method to tokenize a string value (with XML white spaces as the 
+     * delimiter) and return a List containing the string tokens. 
      */
-    private List tokenizeString(String strValue, String delim) {
-        List stringTokens = new ArrayList();
-        
-        String[] strSplitValue = strValue.split(delim);
-        
-        for (int strIdx = 0; strIdx < strSplitValue.length; strIdx++) {
-           stringTokens.add(strSplitValue[strIdx]);
+    private List tokenizeString(String strValue) {
+        StringTokenizer st = new StringTokenizer(strValue, " \n\t\r");
+        List stringTokens = new ArrayList(st.countTokens());
+        while (st.hasMoreTokens()) {
+           stringTokens.add(st.nextToken());
         }
-        
         return stringTokens;
-        
     } // tokenizeString
     
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org