You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2004/01/26 20:37:35 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/impl XML11DTDScannerImpl.java XMLScanner.java XML11DocumentScannerImpl.java

mrglavas    2004/01/26 11:37:35

  Modified:    java/src/org/apache/xerces/impl XML11DTDScannerImpl.java
                        XMLScanner.java XML11DocumentScannerImpl.java
  Log:
  Improve performance of attribute value normalization for XML 1.0
  documents.
  
  Check if each character is < 0x20 instead of calling XMLChar.isSpace.
  This simple check against 0x20 is safe because at the point we 
  normalize the attribute value we've determined that it is well formed, 
  and therefore if we're looking at anything less then 0x20 it must be 
  TAB, LF or CR. This would avoid a method call and table lookup and 
  allow us to skip over 0x20 which doesn't require further normalization.
  
  Since this doesn't work for XML 1.1 docs since control chars are
  allowed in them add the old method to the 1.1 scanners.
  
  Revision  Changes    Path
  1.10      +15 -1     xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
  
  Index: XML11DTDScannerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XML11DTDScannerImpl.java	5 Jan 2004 21:54:05 -0000	1.9
  +++ XML11DTDScannerImpl.java	26 Jan 2004 19:37:35 -0000	1.10
  @@ -195,6 +195,20 @@
           }
           return dataok;
      }
  +   
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset; i < end; i++) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                value.ch[i] = ' ';
  +            }
  +        }
  +    }
   
       // returns true if the given character is not
       // valid with respect to the version of
  
  
  
  1.44      +8 -2      xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.43
  retrieving revision 1.44
  diff -u -r1.43 -r1.44
  --- XMLScanner.java	26 Jan 2004 17:28:12 -0000	1.43
  +++ XMLScanner.java	26 Jan 2004 19:37:35 -0000	1.44
  @@ -1143,7 +1143,13 @@
           int end = value.offset + value.length;
           for (int i = value.offset; i < end; i++) {
               int c = value.ch[i];
  -            if (XMLChar.isSpace(c)) {
  +            // Performance: For XML 1.0 documents take advantage of 
  +            // the fact that the only legal characters below 0x20 
  +            // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
  +            // already determined the well-formedness of these
  +            // characters it is sufficient (and safe) to check
  +            // against 0x20. -- mrglavas
  +            if (c < 0x20) {
                   value.ch[i] = ' ';
               }
           }
  
  
  
  1.16      +15 -1     xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- XML11DocumentScannerImpl.java	5 Jan 2004 21:54:05 -0000	1.15
  +++ XML11DocumentScannerImpl.java	26 Jan 2004 19:37:35 -0000	1.16
  @@ -475,6 +475,20 @@
           }
           return dataok;
      }
  +   
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value) {
  +        int end = value.offset + value.length;
  +	    for (int i = value.offset; i < end; i++) {
  +           int c = value.ch[i];
  +           if (XMLChar.isSpace(c)) {
  +               value.ch[i] = ' ';
  +           }
  +       }
  +    }
   
       // returns true if the given character is not
       // valid with respect to the version of
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org