You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2004/01/26 20:37:35 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/impl XML11DTDScannerImpl.java XMLScanner.java XML11DocumentScannerImpl.java
mrglavas 2004/01/26 11:37:35
Modified: java/src/org/apache/xerces/impl XML11DTDScannerImpl.java
XMLScanner.java XML11DocumentScannerImpl.java
Log:
Improve performance of attribute value normalization for XML 1.0
documents.
Check if each character is < 0x20 instead of calling XMLChar.isSpace.
This simple check against 0x20 is safe because at the point we
normalize the attribute value we've determined that it is well formed,
and therefore if we're looking at anything less then 0x20 it must be
TAB, LF or CR. This would avoid a method call and table lookup and
allow us to skip over 0x20 which doesn't require further normalization.
Since this doesn't work for XML 1.1 docs since control chars are
allowed in them add the old method to the 1.1 scanners.
Revision Changes Path
1.10 +15 -1 xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
Index: XML11DTDScannerImpl.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- XML11DTDScannerImpl.java 5 Jan 2004 21:54:05 -0000 1.9
+++ XML11DTDScannerImpl.java 26 Jan 2004 19:37:35 -0000 1.10
@@ -195,6 +195,20 @@
}
return dataok;
}
+
+ /**
+ * Normalize whitespace in an XMLString converting all whitespace
+ * characters to space characters.
+ */
+ protected void normalizeWhitespace(XMLString value) {
+ int end = value.offset + value.length;
+ for (int i = value.offset; i < end; i++) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ value.ch[i] = ' ';
+ }
+ }
+ }
// returns true if the given character is not
// valid with respect to the version of
1.44 +8 -2 xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
Index: XMLScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
retrieving revision 1.43
retrieving revision 1.44
diff -u -r1.43 -r1.44
--- XMLScanner.java 26 Jan 2004 17:28:12 -0000 1.43
+++ XMLScanner.java 26 Jan 2004 19:37:35 -0000 1.44
@@ -1143,7 +1143,13 @@
int end = value.offset + value.length;
for (int i = value.offset; i < end; i++) {
int c = value.ch[i];
- if (XMLChar.isSpace(c)) {
+ // Performance: For XML 1.0 documents take advantage of
+ // the fact that the only legal characters below 0x20
+ // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
+ // already determined the well-formedness of these
+ // characters it is sufficient (and safe) to check
+ // against 0x20. -- mrglavas
+ if (c < 0x20) {
value.ch[i] = ' ';
}
}
1.16 +15 -1 xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
Index: XML11DocumentScannerImpl.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- XML11DocumentScannerImpl.java 5 Jan 2004 21:54:05 -0000 1.15
+++ XML11DocumentScannerImpl.java 26 Jan 2004 19:37:35 -0000 1.16
@@ -475,6 +475,20 @@
}
return dataok;
}
+
+ /**
+ * Normalize whitespace in an XMLString converting all whitespace
+ * characters to space characters.
+ */
+ protected void normalizeWhitespace(XMLString value) {
+ int end = value.offset + value.length;
+ for (int i = value.offset; i < end; i++) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ value.ch[i] = ' ';
+ }
+ }
+ }
// returns true if the given character is not
// valid with respect to the version of
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org