You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by le...@locus.apache.org on 2000/10/31 02:38:20 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/impl XMLDTDScanner.java XMLDocumentScanner.java XMLScanner.java

lehors      00/10/30 17:38:19

  Modified:    java/src/org/apache/xerces/util Tag: xerces_j_2 XMLChar.java
               java/src/org/apache/xerces/impl Tag: xerces_j_2
                        XMLDTDScanner.java XMLDocumentScanner.java
                        XMLScanner.java
  Log:
  added support for supplemental characters and surrogates
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.9   +48 -1     xml-xerces/java/src/org/apache/xerces/util/Attic/XMLChar.java
  
  Index: XMLChar.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/Attic/XMLChar.java,v
  retrieving revision 1.1.2.8
  retrieving revision 1.1.2.9
  diff -u -r1.1.2.8 -r1.1.2.9
  --- XMLChar.java	2000/10/28 01:14:02	1.1.2.8
  +++ XMLChar.java	2000/10/31 01:38:15	1.1.2.9
  @@ -76,8 +76,9 @@
    * @author Stubs generated by DesignDoc on Wed Jun 07 11:58:44 PDT 2000
    * @author Andy Clark, IBM
    * @author Eric Ye, IBM
  + * @author Arnaud  Le Hors, IBM
    *
  - * @version $Id: XMLChar.java,v 1.1.2.8 2000/10/28 01:14:02 andyc Exp $
  + * @version $Id: XMLChar.java,v 1.1.2.9 2000/10/31 01:38:15 lehors Exp $
    */
   public class XMLChar {
   
  @@ -362,6 +363,52 @@
       //
       // Public static methods
       //
  +
  +    /**
  +     * Returns true if the specified character is a supplemental character.
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isSupplemental(int c) {
  +        return (c >= 0x10000 && c <= 0x10FFFF);
  +    }
  +
  +    /**
  +     * Returns the high surrogate of a supplemental character
  +     *
  +     * @param c The supplementatl character to "split".
  +     */
  +    public static char highSurrogate(int c) {
  +        return (char) (((c - 0x00010000) >> 10) + 0xd800);
  +    }
  +
  +    /**
  +     * Returns the low surrogate of a supplemental character
  +     *
  +     * @param c The supplemental character to "split".
  +     */
  +    public static char lowSurrogate(int c) {
  +        return (char) (((c - 0x00010000) & 0x3ff) + 0xdc00);
  +    }
  +
  +    /**
  +     * Returns whether the given character is a high surrogate
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isHighSurrogate(int c) {
  +        return (0xd800 <= c && c <= 0xdbff);
  +    }
  +
  +    /**
  +     * Returns whether the given character is a low surrogate
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isLowSurrogate(int c) {
  +        return (0xdc00 <= c && c <= 0xdfff);
  +    }
  +
   
       /**
        * Returns true if the specified character is valid. This method
  
  
  
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.56  +6 -4      xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDTDScanner.java
  
  Index: XMLDTDScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDTDScanner.java,v
  retrieving revision 1.1.2.55
  retrieving revision 1.1.2.56
  diff -u -r1.1.2.55 -r1.1.2.56
  --- XMLDTDScanner.java	2000/10/30 22:26:43	1.1.2.55
  +++ XMLDTDScanner.java	2000/10/31 01:38:17	1.1.2.56
  @@ -87,7 +87,7 @@
    * @author Andy Clark, IBM
    * @author Glenn Marcy, IBM
    *
  - * @version $Id: XMLDTDScanner.java,v 1.1.2.55 2000/10/30 22:26:43 lehors Exp $
  + * @version $Id: XMLDTDScanner.java,v 1.1.2.56 2000/10/31 01:38:17 lehors Exp $
    */
   public class XMLDTDScanner
       extends XMLScanner
  @@ -1674,8 +1674,7 @@
                   fStringBuffer2.append(fString);
                   if (fEntityScanner.skipChar('&')) {
                       if (fEntityScanner.skipChar('#')) {
  -                        char c = (char) scanCharReferenceValue();
  -                        fStringBuffer2.append(c);
  +                        scanCharReferenceValue(fStringBuffer2);
                       }
                       else {
                           fStringBuffer2.append('&');
  @@ -1708,7 +1707,10 @@
                   }
                   else {
                       int c = fEntityScanner.peekChar();
  -                    if (XMLChar.isInvalid(c)) {
  +                    if (XMLChar.isHighSurrogate(c)) {
  +                        scanSurrogates(fStringBuffer2);
  +                    }
  +                    else if (XMLChar.isInvalid(c)) {
                           fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
                                                      "InvalidCharInLiteral",
                                                      new Object[] { Integer.toHexString(c) },
  
  
  
  1.1.2.54  +41 -23    xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java
  
  Index: XMLDocumentScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java,v
  retrieving revision 1.1.2.53
  retrieving revision 1.1.2.54
  diff -u -r1.1.2.53 -r1.1.2.54
  --- XMLDocumentScanner.java	2000/10/28 01:13:59	1.1.2.53
  +++ XMLDocumentScanner.java	2000/10/31 01:38:17	1.1.2.54
  @@ -100,8 +100,9 @@
    * @author Glenn Marcy, IBM
    * @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
    * @author Andy Clark, IBM
  + * @author Arnaud  Le Hors, IBM
    *
  - * @version $Id: XMLDocumentScanner.java,v 1.1.2.53 2000/10/28 01:13:59 andyc Exp $
  + * @version $Id: XMLDocumentScanner.java,v 1.1.2.54 2000/10/31 01:38:17 lehors Exp $
    */
   public class XMLDocumentScanner
       extends XMLScanner
  @@ -269,6 +270,9 @@
       /** Single character array. */
       private final char[] fSingleChar = new char[1];
   
  +    /** String buffer. */
  +    private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  +
       /** External entity. */
       private XMLEntityManager.ExternalEntity fExternalEntity = new XMLEntityManager.ExternalEntity();
   
  @@ -965,9 +969,9 @@
               if (DEBUG_ATTR_ENTITIES) {
                   System.out.println("*** set attribute offset: "+fAttributeOffset);
               }
  -            fStringBuffer.clear();
  +            fStringBuffer2.clear();
               do {
  -                fStringBuffer.append(fString);
  +                fStringBuffer2.append(fString);
                   fAttributeOffset += fString.length;
                   if (DEBUG_ATTR_ENTITIES) {
                       System.out.println("*** increment attribute offset: "+fAttributeOffset);
  @@ -975,9 +979,8 @@
                   if (c == '&') {
                       fEntityScanner.skipChar('&');
                       if (fEntityScanner.skipChar('#')) {
  -                        int cv = scanCharReferenceValue();
  -                        if (cv != -1) {
  -                            fStringBuffer.append((char)cv);
  +                        int ch = scanCharReferenceValue(fStringBuffer2);
  +                        if (ch != -1) {
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
  @@ -997,35 +1000,35 @@
                                                          null, XMLErrorReporter.SEVERITY_FATAL_ERROR);
                           }
                           if (entityName == fAmpSymbol) {
  -                            fStringBuffer.append('&');
  +                            fStringBuffer2.append('&');
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
                               }
                           }
                           else if (entityName == fAposSymbol) {
  -                            fStringBuffer.append('\'');
  +                            fStringBuffer2.append('\'');
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
                               }
                           }
                           else if (entityName == fLtSymbol) {
  -                            fStringBuffer.append('<');
  +                            fStringBuffer2.append('<');
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
                               }
                           }
                           else if (entityName == fGtSymbol) {
  -                            fStringBuffer.append('>');
  +                            fStringBuffer2.append('>');
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
                               }
                           }
                           else if (entityName == fQuotSymbol) {
  -                            fStringBuffer.append('"');
  +                            fStringBuffer2.append('"');
                               fAttributeOffset++;
                               if (DEBUG_ATTR_ENTITIES) {
                                   System.out.println("*** increment attribute offset: "+fAttributeOffset);
  @@ -1058,7 +1061,10 @@
                                                  XMLErrorReporter.SEVERITY_FATAL_ERROR);
                   }
                   else if (c == '%') {
  -                    fStringBuffer.append((char)fEntityScanner.scanChar());
  +                    fStringBuffer2.append((char)fEntityScanner.scanChar());
  +                }
  +                else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  +                    scanSurrogates(fStringBuffer2);
                   }
                   else if (c != -1 && XMLChar.isInvalid(c)) {
                       fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 
  @@ -1072,13 +1078,13 @@
                       if (c != quote || entityDepth == fEntityDepth) {
                           break;
                       }
  -                    fStringBuffer.append(fString);
  -                    fStringBuffer.append((char)fEntityScanner.scanChar());
  +                    fStringBuffer2.append(fString);
  +                    fStringBuffer2.append((char)fEntityScanner.scanChar());
                   }
               } while (c != quote);
               fAttributeOffset += fString.length;
  -            fStringBuffer.append(fString);
  -            value = fStringBuffer;
  +            fStringBuffer2.append(fString);
  +            value = fStringBuffer2;
               int attrEntityCount = fAttributeEntityStack.size();
               if (DEBUG_ATTR_ENTITIES) {
                   System.out.println("*** add remaining attribute entities: "+attrEntityCount);
  @@ -1145,6 +1151,7 @@
   
       } // scanContent():int
   
  +
       /** 
        * Scans a CDATA section. 
        * <p>
  @@ -1267,14 +1274,14 @@
        */
       protected void scanCharReference() 
           throws IOException, SAXException {
  -
  -        int value = scanCharReferenceValue();
   
  -        // call handler
  -        if (fDocumentHandler != null) {
  -            fSingleChar[0] = (char)value;
  -            fString.setValues(fSingleChar, 0, 1);
  -            fDocumentHandler.characters(fString);
  +        fStringBuffer2.clear();
  +        int ch = scanCharReferenceValue(fStringBuffer2);
  +        if (ch != -1) {
  +            // call handler
  +            if (fDocumentHandler != null) {
  +                fDocumentHandler.characters(fStringBuffer2);
  +            }
           }
   
       } // scanCharReference()
  @@ -1940,6 +1947,17 @@
                                           fEntityScanner.scanChar();
                                           setScannerState(SCANNER_STATE_REFERENCE);
                                           break;
  +                                    }
  +                                    else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  +                                        // special case: we have surrogates
  +                                        fStringBuffer.clear();
  +                                        if (scanSurrogates(fStringBuffer)) {
  +
  +                                            // call handler
  +                                            if (fDocumentHandler != null) {
  +                                                fDocumentHandler.characters(fStringBuffer);
  +                                            }
  +                                        }
                                       }
                                       else if (c != -1 && XMLChar.isInvalid(c)) {
                                           fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 
  
  
  
  1.1.2.22  +49 -3     xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java,v
  retrieving revision 1.1.2.21
  retrieving revision 1.1.2.22
  diff -u -r1.1.2.21 -r1.1.2.22
  --- XMLScanner.java	2000/10/31 00:35:47	1.1.2.21
  +++ XMLScanner.java	2000/10/31 01:38:18	1.1.2.22
  @@ -92,7 +92,7 @@
    * @author Andy Clark, IBM
    * @author Arnaud  Le Hors, IBM
    *
  - * @version $Id: XMLScanner.java,v 1.1.2.21 2000/10/31 00:35:47 andyc Exp $
  + * @version $Id: XMLScanner.java,v 1.1.2.22 2000/10/31 01:38:18 lehors Exp $
    */
   public abstract class XMLScanner 
       implements XMLComponent {
  @@ -555,7 +555,9 @@
   
   
       /**
  -     * Scans a character reference.
  +     * Scans a character reference and append the corresponding chars to the
  +     * specified buffer.
  +     *
        * <p>
        * <pre>
        * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
  @@ -564,9 +566,11 @@
        * <strong>Note:</strong> This method uses fStringBuffer, anything in it
        * at the time of calling is lost.
        *
  +     * @param buf the character buffer to append chars to
  +     *
        * @return the character value
        */
  -    protected int scanCharReferenceValue() 
  +    protected int scanCharReferenceValue(XMLStringBuffer buf) 
           throws IOException, SAXException {
   
           // scan hexadecimal value
  @@ -626,7 +630,49 @@
                                              Integer.toString(value, 16) },
                                          XMLErrorReporter.SEVERITY_FATAL_ERROR);
           }
  +
  +        // append corresponding chars to the given buffer
  +        if (!XMLChar.isSupplemental(value)) {
  +            buf.append((char) value);
  +        }
  +        else {
  +            // character is supplemental, split it into surrogate chars
  +            buf.append(XMLChar.highSurrogate(value));
  +            buf.append(XMLChar.lowSurrogate(value));
  +        }
  +
           return value;
       }
  +
  +
  +    /**
  +     * Scans surrogates and append them to the specified buffer.
  +     * <p>
  +     * <strong>Note:</strong> This assumes the current char has already been
  +     * identified as a high surrogate.
  +     *
  +     * @returns True if it succeeded.
  +     */
  +    protected boolean scanSurrogates(XMLStringBuffer buf)
  +        throws IOException, SAXException {
  +
  +        int high = fEntityScanner.scanChar();
  +        int low = fEntityScanner.peekChar();
  +        if (!XMLChar.isLowSurrogate(low)) {
  +            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 
  +                                       "InvalidCharInContent",
  +                                       new Object[] {Integer.toString(high, 16)},
  +                                       XMLErrorReporter.SEVERITY_FATAL_ERROR);
  +            return false;
  +        }
  +        fEntityScanner.scanChar();
  +
  +        // fill in the buffer
  +        buf.append((char)high);
  +        buf.append((char)low);
  +
  +        return true;
  +
  +    } // scanSurrogates():boolean
   
   } // class XMLScanner