You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2010/10/18 06:37:41 UTC
svn commit: r1023648 - in
/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs:
XMLAssertPsychopathImpl.java opti/SchemaDOMParser.java
Author: mrglavas
Date: Mon Oct 18 04:37:41 2010
New Revision: 1023648
URL: http://svn.apache.org/viewvc?rev=1023648&view=rev
Log:
Fixing subtle bugs with string tokenization. The regex "\\s+" matches a couple characters which are not XML white spaces. Use a StringTokenizer instead with the four white spaces chars (0x9, 0xA, 0xD, 0x20) as delimiters. Should be slightly faster too since the split() method ends up compiling a Pattern every time you call it.
Modified:
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java
xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java
Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java?rev=1023648&r1=1023647&r2=1023648&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/XMLAssertPsychopathImpl.java Mon Oct 18 04:37:41 2010
@@ -22,6 +22,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
+import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.xerces.dom.CoreDocumentImpl;
@@ -384,14 +385,15 @@ public class XMLAssertPsychopathImpl ext
// of a simpleType -> list.
// tokenize the list value by the longest sequence of
// white-spaces.
- String[] values = value.split("\\s+");
+ StringTokenizer values = new StringTokenizer(value, " \n\t\r");
// evaluate assertion on all of list items
- for (int valIdx = 0; valIdx < values.length; valIdx++) {
- setValueOf$valueForAListItem(attrType, values[valIdx]);
+ while (values.hasMoreTokens()) {
+ String itemValue = values.nextToken();
+ setValueOf$valueForAListItem(attrType, itemValue);
AssertionError assertError = evaluateAssertion(element,
assertImpl,
- values[valIdx],
+ itemValue,
xpathContextExists,
true);
if (assertError != null) {
@@ -465,17 +467,18 @@ public class XMLAssertPsychopathImpl ext
// evaluating assertions for simpleType -> list.
// tokenize the list value by the longest sequence of
// white-spaces.
- String[] values = value.split("\\s+");
+ StringTokenizer values = new StringTokenizer(value, " \n\t\r");
// evaluate assertion on all of list items
- for (int valIdx = 0; valIdx < values.length; valIdx++) {
- setValueOf$valueForAListItem(itemType, values[valIdx]);
- AssertionError assertError = evaluateAssertion(element,
- assertImpl, values[valIdx],
- false, true);
- if (assertError != null) {
- reportAssertionsError(assertError);
- }
+ while (values.hasMoreTokens()) {
+ String itemValue = values.nextToken();
+ setValueOf$valueForAListItem(itemType, itemValue);
+ AssertionError assertError = evaluateAssertion(element,
+ assertImpl, itemValue,
+ false, true);
+ if (assertError != null) {
+ reportAssertionsError(assertError);
+ }
}
}
else if (memberTypes != null && memberTypes.getLength() == 0) {
@@ -797,7 +800,7 @@ public class XMLAssertPsychopathImpl ext
getItemType();
// split the "string value" of list contents, into non white-space
// tokens.
- String[] values = value.split("\\s+");
+ StringTokenizer values = new StringTokenizer(value, " \n\t\r");
// construct a list of atomic XDM items, to assign to XPath2
// context variable $value.
@@ -805,23 +808,25 @@ public class XMLAssertPsychopathImpl ext
if ((listItemType.getMemberTypes()).getLength() > 0) {
// itemType of xs:list has variety 'union'. here list items may
// have different types, which are determined below.
- for (int valIdx = 0; valIdx < values.length; valIdx++) {
+ while (values.hasMoreTokens()) {
+ String itemValue = values.nextToken();
XSSimpleTypeDefinition listItemTypeForUnion =
getActualListItemTypeForVarietyUnion
(listItemType.getMemberTypes(),
- values[valIdx]);
+ itemValue);
xdmItemList.add(SchemaTypeValueFactory.newSchemaTypeValue
(listItemTypeForUnion.
- getBuiltInKind(),values[valIdx]));
+ getBuiltInKind(), itemValue));
}
}
else {
// every list item has a same type (the itemType of
// xs:list).
- for (int valIdx = 0; valIdx < values.length; valIdx++) {
+ while (values.hasMoreTokens()) {
+ String itemValue = values.nextToken();
xdmItemList.add(SchemaTypeValueFactory.newSchemaTypeValue
(listItemType.getBuiltInKind(),
- values[valIdx]));
+ itemValue));
}
}
Modified: xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java
URL: http://svn.apache.org/viewvc/xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java?rev=1023648&r1=1023647&r2=1023648&view=diff
==============================================================================
--- xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java (original)
+++ xerces/java/branches/xml-schema-1.1-dev/src/org/apache/xerces/impl/xs/opti/SchemaDOMParser.java Mon Oct 18 04:37:41 2010
@@ -19,6 +19,7 @@ package org.apache.xerces.impl.xs.opti;
import java.util.Iterator;
import java.util.List;
+import java.util.StringTokenizer;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
@@ -658,19 +659,17 @@ public class SchemaDOMParser extends Def
nfe.getMessage()}, XMLErrorReporter.SEVERITY_ERROR);
}
}
- // '\\s+' is regex denoting longest sequence of consecutive
- // white-space characters.
else if (SchemaSymbols.ATT_TYPEAVAILABLE.equals(attrLocalName)) {
- typeAvailableList = tokenizeString(attrValue, "\\s+");
+ typeAvailableList = tokenizeString(attrValue);
}
else if (SchemaSymbols.ATT_TYPEUNAVAILABLE.equals(attrLocalName)) {
- typeUnavailableList = tokenizeString(attrValue, "\\s+");
+ typeUnavailableList = tokenizeString(attrValue);
}
else if (SchemaSymbols.ATT_FACETAVAILABLE.equals(attrLocalName)) {
- facetAvailableList = tokenizeString(attrValue, "\\s+");
+ facetAvailableList = tokenizeString(attrValue);
}
else if (SchemaSymbols.ATT_FACETUNAVAILABLE.equals(attrLocalName)) {
- facetUnavailableList = tokenizeString(attrValue, "\\s+");
+ facetUnavailableList = tokenizeString(attrValue);
}
else {
// report a warning
@@ -852,20 +851,16 @@ public class SchemaDOMParser extends Def
/*
- * Method to tokenize a string value given a tokenizing delimiter, and
- * return a List containing the string tokens.
+ * Method to tokenize a string value (with XML white spaces as the
+ * delimiter) and return a List containing the string tokens.
*/
- private List tokenizeString(String strValue, String delim) {
- List stringTokens = new ArrayList();
-
- String[] strSplitValue = strValue.split(delim);
-
- for (int strIdx = 0; strIdx < strSplitValue.length; strIdx++) {
- stringTokens.add(strSplitValue[strIdx]);
+ private List tokenizeString(String strValue) {
+ StringTokenizer st = new StringTokenizer(strValue, " \n\t\r");
+ List stringTokens = new ArrayList(st.countTokens());
+ while (st.hasMoreTokens()) {
+ stringTokens.add(st.nextToken());
}
-
return stringTokens;
-
} // tokenizeString
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org