You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/10/05 03:59:30 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/xni XMLDocumentHandler.java

andyc       00/10/04 18:59:30

  Modified:    java/src/org/apache/xerces/impl Tag: xerces_j_2
                        XMLDocumentScanner.java XMLEntityManager.java
                        XMLEntityScanner.java XMLValidator.java
               java/src/org/apache/xerces/parsers Tag: xerces_j_2
                        XMLDocumentParser.java
               java/src/org/apache/xerces/xni Tag: xerces_j_2
                        XMLDocumentHandler.java
  Added:       java/src/org/apache/xerces/util Tag: xerces_j_2 URI.java
  Log:
  1) Removed "actualEncoding" parameter from xmlDecl/textDecl
     callbacks in the XMLDocumentHandler. Updated all of the
     implementors of this interface and the document scanner
     to make the correct callback. This information is now
     passed during the startEntity callback of the
     XMLEntityHandler interface.
  2) Updated the document scanner to allow an EOFException to
     occur during the TrailingMiscDispatcher. I just remembered
     that well-formedness of comments and PIs appearing in the
     trailing miscellaneous section is NOT being checked on
     EOFException -- this change needs to go in later.
  3) Continued work on the XMLEntityManager/Scanner. Filled in
     some more methods and worked out a system for handling the
     auto-detection of input stream encodings while keeping
     that transparent from the entity scanner. Next I'll be
     re-writing the entity scanner instance to improve perf
     and provide a complete implementation of the entity
     scanner API.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.1   +1393 -0   xml-xerces/java/src/org/apache/xerces/util/Attic/URI.java
  
  
  
  
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.8   +43 -37    xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java
  
  Index: XMLDocumentScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java,v
  retrieving revision 1.1.2.7
  retrieving revision 1.1.2.8
  diff -u -r1.1.2.7 -r1.1.2.8
  --- XMLDocumentScanner.java	2000/10/04 01:19:00	1.1.2.7
  +++ XMLDocumentScanner.java	2000/10/05 01:59:28	1.1.2.8
  @@ -100,7 +100,7 @@
    * @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
    * @author Andy Clark, IBM
    *
  - * @version $Id: XMLDocumentScanner.java,v 1.1.2.7 2000/10/04 01:19:00 ericye Exp $
  + * @version $Id: XMLDocumentScanner.java,v 1.1.2.8 2000/10/05 01:59:28 andyc Exp $
    */
   public class XMLDocumentScanner
       implements XMLComponent, XMLDocumentSource, XMLEntityHandler {
  @@ -547,7 +547,6 @@
           // pseudo-attribute values
           String version = null;
           String encoding = null;
  -        String actualEncoding = null;
           String standalone = null;
   
           // scan pseudo-attributes
  @@ -655,10 +654,10 @@
           // call handler
           if (fDocumentHandler != null) {
               if (scanningTextDecl) {
  -                fDocumentHandler.textDecl(version, encoding, actualEncoding);
  +                fDocumentHandler.textDecl(version, encoding);
               }
               else {
  -                fDocumentHandler.xmlDecl(version, encoding, actualEncoding, standalone);
  +                fDocumentHandler.xmlDecl(version, encoding, standalone);
               }
           }
   
  @@ -1968,45 +1967,52 @@
           public boolean dispatch(boolean complete) 
               throws IOException, SAXException {
   
  -            do {
  -
  -                if (fEntityScanner.skipChar('<')) {
  -                    setScannerState(SCANNER_STATE_START_OF_MARKUP);
  -                    if (fEntityScanner.skipChar('?')) {
  -                        scanPI();
  +            try {
  +                do {
  +    
  +                    if (fEntityScanner.skipChar('<')) {
  +                        setScannerState(SCANNER_STATE_START_OF_MARKUP);
  +                        if (fEntityScanner.skipChar('?')) {
  +                            scanPI();
  +                        }
  +                        else if ( fEntityScanner.skipChar('!')) {
  +                            scanComment();
  +                        }
  +                        setScannerState(SCANNER_STATE_TRAILING_MISC);
                       }
  -                    else if ( fEntityScanner.skipChar('!')) {
  -                        scanComment();
  +                    else if ( fEntityScanner.skipSpaces() ) {
  +                        // do nothing
                       }
  -                    setScannerState(SCANNER_STATE_TRAILING_MISC);
  -                }
  -                else if ( fEntityScanner.skipSpaces() ) {
  -                    // do nothing
  -                }
  -                else {
  -                    int ch = fEntityScanner.peekChar();
  -
  -                    if (XMLChar.isInvalid(ch)) {
  -                        if (ch == -1 ) {
  -                            setScannerState(SCANNER_STATE_END_OF_INPUT);
  -                            setDispatcher(fEndOfInputDispatcher);
  -                            return true;
  +                    else {
  +                        int ch = fEntityScanner.peekChar();
  +    
  +                        if (XMLChar.isInvalid(ch)) {
  +                            if (ch == -1 ) {
  +                                setScannerState(SCANNER_STATE_END_OF_INPUT);
  +                                setDispatcher(fEndOfInputDispatcher);
  +                                return true;
  +                            }
  +                            else {
  +                                // REVISIT report error
  +                                // throw new SAXException("invalid char in trailing Misc);
  +                                setScannerState(SCANNER_STATE_END_OF_INPUT);
  +                                setDispatcher(fEndOfInputDispatcher);
  +                                return false;
  +                            }
                           }
                           else {
  -                            // REVISIT report error
  -                            // throw new SAXException("invalid char in trailing Misc);
  -                            setScannerState(SCANNER_STATE_END_OF_INPUT);
  -                            setDispatcher(fEndOfInputDispatcher);
  -                            return false;
  +                            //REVISIT: report error
  +                            throw new SAXException("not recognized in trailing Misc");
                           }
  -                    }
  -                    else {
  -                        //REVISIT: report error
  -                        throw new SAXException("not recognized in trailing Misc");
                       }
  -                }
  -
  -            } while ( complete );
  +    
  +                } while ( complete );
  +            }
  +            catch (EOFException e) {
  +                // NOTE: This is the only place we're allowed to reach
  +                //       the real end of the document stream. So ignore
  +                //       the exception and move to end of input. -Ac
  +            }
   
               setScannerState(SCANNER_STATE_END_OF_INPUT);
               setDispatcher(fEndOfInputDispatcher);
  
  
  
  1.1.2.6   +401 -55   xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java
  
  Index: XMLEntityManager.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java,v
  retrieving revision 1.1.2.5
  retrieving revision 1.1.2.6
  diff -u -r1.1.2.5 -r1.1.2.6
  --- XMLEntityManager.java	2000/10/04 20:45:35	1.1.2.5
  +++ XMLEntityManager.java	2000/10/05 01:59:28	1.1.2.6
  @@ -59,16 +59,20 @@
   
   import java.io.EOFException;
   import java.io.FileInputStream;
  +import java.io.FilterReader;
   import java.io.InputStream;
   import java.io.InputStreamReader;
   import java.io.IOException;
  +import java.io.PushbackInputStream;
   import java.io.PushbackReader;
   import java.io.Reader;
   import java.io.StringReader;
   import java.util.Hashtable;
   import java.util.Stack;
   
  +import org.apache.xerces.util.EncodingMap;
   import org.apache.xerces.util.SymbolTable;
  +import org.apache.xerces.util.URI;
   import org.apache.xerces.util.XMLChar;
   
   import org.apache.xerces.xni.QName;
  @@ -101,7 +105,7 @@
    * @author Stubs generated by DesignDoc on Mon Sep 18 18:23:16 PDT 2000
    * @author Andy Clark, IBM
    *
  - * @version $Id: XMLEntityManager.java,v 1.1.2.5 2000/10/04 20:45:35 ericye Exp $
  + * @version $Id: XMLEntityManager.java,v 1.1.2.6 2000/10/05 01:59:28 andyc Exp $
    */
   public class XMLEntityManager
       implements XMLComponent {
  @@ -110,6 +114,9 @@
       // Constants
       //
   
  +    /** Default buffer size (1024). */
  +    public static final int DEFAULT_BUFFER_SIZE = 1024;
  +
       // xerces features
   
       /** Xerces feature prefix. */
  @@ -135,12 +142,26 @@
       // Data
       //
   
  +    // features
  +
  +    /** 
  +     * Buffer size. This feature does not have a feature identifier, yet. 
  +     * Should it?
  +     */
  +    protected int fBufferSize = DEFAULT_BUFFER_SIZE;
  +
       // properties
   
  -    /** Entity resolver. */
  +    /** 
  +     * Entity resolver. This property identifier is:
  +     * http://apache.org/xml/properties/internal/entity-resolver
  +     */
       protected EntityResolver fEntityResolver;
   
  -    /** Symbol table. */
  +    /** 
  +     * Symbol table. This property identifier is:
  +     * http://apache.org/xml/properties/internal/symbol-table
  +     */
       protected SymbolTable fSymbolTable;
   
       // handlers
  @@ -153,6 +174,8 @@
       /** Entity scanner. */
       protected XMLEntityScanner fEntityScanner;
   
  +    // entities
  +
       /** Entities. */
       protected Hashtable fEntities = new Hashtable();
   
  @@ -162,17 +185,6 @@
       /** Current entity. */
       protected ScannedEntity fCurrentEntity;
   
  -    // private
  -
  -    /** Reader. */
  -    private PushbackReader fReader;
  -
  -    /** Character buffer. */
  -    private char[] fBuffer = new char[1024];
  -
  -    /** Buffer length. */
  -    private int fLength;
  -
       //
       // Constructors
       //
  @@ -204,7 +216,6 @@
        * @param baseSystemId 
        */
       public void addExternalEntity(String name, String publicId, String systemId, String baseSystemId) {
  -        name = fSymbolTable.addSymbol(name);
           Entity entity = new ExternalEntity(name, publicId, systemId, baseSystemId);
           fEntities.put(name, entity);
       } // addExternalEntity(String,String,String,String)
  @@ -216,7 +227,6 @@
        * @param text 
        */
       public void addInternalEntity(String name, String text) {
  -        name = fSymbolTable.addSymbol(name);
           Entity entity = new InternalEntity(name, text);
           fEntities.put(name, entity);
       } // addInternalEntity(String,String)
  @@ -232,7 +242,29 @@
        */
       public InputSource resolveEntity(String publicId, String systemId, String baseSystemId)
           throws IOException, SAXException {
  -        throw new RuntimeException("XMLEntityManager#resolveEntity(String,String,String) not implemented");
  +
  +        // give the entity resolver a chance
  +        InputSource inputSource = null;
  +        if (fEntityResolver != null) {
  +            inputSource = fEntityResolver.resolveEntity(publicId, systemId);
  +        }
  +
  +        // do default resolution
  +        if (inputSource == null) {
  +            // if no base systemId given, assume that it's relative
  +            // to the systemId of the current scanned entity
  +            if (baseSystemId == null) {
  +                baseSystemId = fCurrentEntity.systemId;
  +            }
  +
  +            // expand the system id
  +            systemId = expandSystemId(systemId, baseSystemId);
  +            inputSource = new InputSource(systemId);
  +            inputSource.setPublicId(publicId);
  +        }
  +
  +        return inputSource;
  +
       } // resolveEntity
   
       /**
  @@ -325,9 +357,6 @@
           addInternalEntity("apos", "'");
           addInternalEntity("quot", "\"");
   
  -        // initialize scanner info
  -        fReader = null;
  -
       } // reset(XMLComponentManager)
   
       /**
  @@ -380,24 +409,51 @@
           String encoding = inputSource.getEncoding();
   
           // create reader
  +        InputStream stream = null;
           Reader reader = inputSource.getCharacterStream();
           if (reader == null) {
  -            InputStream stream = inputSource.getByteStream();
  +            stream = inputSource.getByteStream();
               if (stream == null) {
                   // REVISIT: open system identifier
                   stream = new FileInputStream(systemId);
  +            }
  +                
  +            // perform auto-detect of encoding
  +            if (encoding == null) {
  +                // read first four bytes and determine encoding
  +                final byte[] b4 = new byte[4];
  +                int count = stream.read(b4, 0, 4);
  +                encoding = getJavaEncodingName(b4, count);
  +
  +                // push back the characters we read
  +                PushbackInputStream pbstream = new PushbackInputStream(stream, 4);
  +                pbstream.unread(b4, 0, count);
  +                stream = pbstream;
               }
  -            reader = new InputStreamReader(stream);
  +
  +            // create reader from input stream
  +            // REVISIT: We can use customized readers here. -Ac
  +            reader = new InputStreamReader(stream, encoding);
  +
  +            // REVISIT: Activate this reader once I've updated the
  +            //          entity scanner. -Ac
  +            //reader = new OneCharReader(reader);
           }
  -        fReader = new PushbackReader(reader, 32);
  +
  +        // REVISIT: This goes away once I've updated the entity
  +        //          scanner to buffer from a standard java.io.Reader
  +        //          object. -Ac
  +        reader = new PushbackReader(reader, 32);
   
           // push entity on stack
  -        fCurrentEntity = new ScannedEntity(name, publicId, systemId, reader);
  +        fCurrentEntity = new ScannedEntity(name, publicId, systemId, 
  +                                           stream, reader, encoding);
           fEntityStack.push(fCurrentEntity);
   
           // call handler
           if (fEntityHandler != null) {
  -            fEntityHandler.startEntity(name, publicId, systemId, encoding);
  +            String ianaEncoding = EncodingMap.getJava2IANAMapping(encoding);
  +            fEntityHandler.startEntity(name, publicId, systemId, ianaEncoding);
           }
   
       } // startEntity(String,InputSource)
  @@ -417,7 +473,198 @@
   
       } // endEntity(String)
   
  +    /**
  +     * Expands a system id and returns the system id as a URI, if
  +     * it can be expanded. A return value of null means that the
  +     * identifier is already expanded. An exception thrown
  +     * indicates a failure to expand the id.
  +     *
  +     * @param systemId The systemId to be expanded.
  +     *
  +     * @return Returns the URI string representing the expanded system
  +     *         identifier. A null value indicates that the given
  +     *         system identifier is already expanded.
  +     *
  +     */
  +    protected String expandSystemId(String systemId) {
  +        return expandSystemId(systemId, null);
  +    } // expandSystemId(String):String
  +
  +    /**
  +     * Expands a system id and returns the system id as a URI, if
  +     * it can be expanded. A return value of null means that the
  +     * identifier is already expanded. An exception thrown
  +     * indicates a failure to expand the id.
  +     *
  +     * @param systemId The systemId to be expanded.
  +     *
  +     * @return Returns the URI string representing the expanded system
  +     *         identifier. A null value indicates that the given
  +     *         system identifier is already expanded.
  +     *
  +     */
  +    protected String expandSystemId(String systemId, 
  +                                    String baseSystemId) {
  +
  +        // check for bad parameters id
  +        if (systemId == null || systemId.length() == 0) {
  +            return systemId;
  +        }
  +
  +        // if id already expanded, return
  +        try {
  +            URI uri = new URI(systemId);
  +            if (uri != null) {
  +                return systemId;
  +            }
  +        }
  +        catch (URI.MalformedURIException e) {
  +            // continue on...
  +        }
  +
  +        // normalize id
  +        String id = fixURI(systemId);
  +
  +        // normalize base
  +        URI base = null;
  +        URI uri = null;
  +        try {
  +            if (baseSystemId == null) {
  +                String dir;
  +                try {
  +                    dir = fixURI(System.getProperty("user.dir"));
  +                }
  +                catch (SecurityException se) {
  +                    dir = "";
  +                }
  +                if (!dir.endsWith("/")) {
  +                    dir = dir + "/";
  +                }
  +                base = new URI("file", "", dir, null, null);
  +            }
  +            else {
  +                base = new URI(baseSystemId);
  +            }
  +
  +            // expand id
  +            uri = new URI(base, id);
  +        }
  +        catch (Exception e) {
  +            // let it go through
  +        }
  +        if (uri == null) {
  +            return systemId;
  +        }
  +        return uri.toString();
  +
  +    } // expandSystemId(String,String):String
  +
       //
  +    // Private methods
  +    //
  +
  +    /**
  +     * Returns the Java encoding name that is auto-detected from
  +     * the bytes specified.
  +     *
  +     * @param b4    The first four bytes of the input.
  +     * @param count The number of bytes actually read.
  +     *
  +     * @returns The Java encoding name.
  +     */
  +    private static String getJavaEncodingName(byte[] b4, int count) {
  +
  +        if (count < 2) {
  +            return "UTF8";
  +        }
  +
  +        // UTF-16, with BOM
  +        byte b0 = b4[0];
  +        byte b1 = b4[1];
  +        if (b0 == 0xFE && b1 == 0xFF) {
  +            // UTF-16, big-endian
  +            return "UnicodeBig";
  +        }
  +        if (b0 == 0xFF && b1 == 0xFE) {
  +            // UTF-16, little-endian
  +            return "UnicodeLittle";
  +        }
  +
  +        if (count < 4) {
  +            return "UTF8";
  +        }
  +
  +        // other encodings
  +        byte b2 = b4[2];
  +        byte b3 = b4[3];
  +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
  +            // UCS-4, big endian (1234)
  +            // REVISIT: What should this be?
  +            return "Unicode";
  +        }
  +        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
  +            // UCS-4, little endian (4321)
  +            // REVISIT: What should this be?
  +            return "Unicode";
  +        }
  +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
  +            // UCS-4, unusual octet order (2143)
  +            // REVISIT: What should this be?
  +            return "Unicode";
  +        }
  +        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
  +            // UCS-4, unusual octect order (3412)
  +            // REVISIT: What should this be?
  +            return "Unicode";
  +        }
  +        if (b1 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
  +            // UTF-16, big-endian, no BOM
  +            // REVISIT: What should this be?
  +            return "Unicode";
  +        }
  +        if (b1 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
  +            // UTF-16, little-endian, no BOM
  +            return "UnicodeLittle";
  +        }
  +        if (b1 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
  +            // EBCDIC
  +            return "DBCS_EBCDIC";
  +        }
  +
  +        // default encoding
  +        return "UTF8";
  +
  +    } // getJavaEncodingName(byte[],int):String
  +
  +    /**
  +     * Fixes a platform dependent filename to standard URI form.
  +     *
  +     * @param str The string to fix.
  +     *
  +     * @return Returns the fixed URI string.
  +     */
  +    private static String fixURI(String str) {
  +
  +        // handle platform dependent strings
  +        str = str.replace(java.io.File.separatorChar, '/');
  +
  +        // Windows fix
  +        if (str.length() >= 2) {
  +            char ch1 = str.charAt(1);
  +            if (ch1 == ':') {
  +                char ch0 = Character.toUpperCase(str.charAt(0));
  +                if (ch0 >= 'A' && ch0 <= 'Z') {
  +                    str = "/" + str;
  +                }
  +            }
  +        }
  +
  +        // done
  +        return str;
  +
  +    } // fixURI(String):String
  +
  +    //
       // Classes
       //
   
  @@ -539,7 +786,7 @@
        *
        * @author Andy Clark, IBM
        */
  -    protected static class ScannedEntity 
  +    protected class ScannedEntity 
           extends Entity {
   
           //
  @@ -548,6 +795,9 @@
   
           // i/o
   
  +        /** Input stream. */
  +        public InputStream stream;
  +
           /** Reader. */
           public Reader reader;
   
  @@ -565,10 +815,15 @@
           /** Column number. */
           public int columnNumber;
   
  +        // encoding
  +
  +        /** Auto-detected ncoding. */
  +        public String encoding;
  +
           // buffer
   
           /** Character buffer. */
  -        public char[] ch;
  +        public char[] ch = new char[fBufferSize];
   
           /** Offset. */
           public int offset;
  @@ -582,11 +837,14 @@
   
           /** Constructs a scanned entity. */
           public ScannedEntity(String name, String publicId, String systemId,
  -                             Reader reader) {
  +                             InputStream stream, Reader reader, 
  +                             String encoding) {
               super(name);
               this.publicId = publicId;
               this.systemId = systemId;
  +            this.stream = stream;
               this.reader = reader;
  +            this.encoding = encoding;
           } // <init>(Reader,String,String,String)
   
           //
  @@ -620,6 +878,13 @@
           // XMLEntityScanner methods
           //
       
  +        public void setEncoding(String encoding) throws IOException {
  +            if (fCurrentEntity.stream != null) {
  +                OneCharReader ocreader = (OneCharReader)fCurrentEntity.reader;
  +                fCurrentEntity.reader = ocreader.getReader();
  +            }
  +        } // setEncoding(String)
  +
           /**
            * peekChar
            * 
  @@ -652,16 +917,16 @@
           public String scanNmtoken() throws IOException, SAXException {
               if (DEBUG) System.out.println("#scanNmtoken()");
       
  -            fLength = 0;
  +            fCurrentEntity.length = 0;
               boolean nmtoken = false;
               while (XMLChar.isName(peek())) {
                   nmtoken = true;
  -                fBuffer[fLength++] = (char)read();
  +                fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
               }
       
               String symbol = null;
               if (nmtoken) {
  -                symbol = fSymbolTable.addSymbol(fBuffer, 0, fLength);
  +                symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, fCurrentEntity.length);
               }
               return symbol;
       
  @@ -675,19 +940,19 @@
           public String scanName() throws IOException, SAXException {
               if (DEBUG) System.out.println("#scanName()");
       
  -            fLength = 0;
  +            fCurrentEntity.length = 0;
               boolean name = false;
               if (XMLChar.isNameStart(peek())) {
                   name = true;
  -                fBuffer[fLength++] = (char)read();
  +                fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
                   while (XMLChar.isName(peek())) {
  -                    fBuffer[fLength++] = (char)read();
  +                    fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
                   }
               }
       
               String symbol = null;
               if (name) {
  -                symbol = fSymbolTable.addSymbol(fBuffer, 0, fLength);
  +                symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, fCurrentEntity.length);
               }
               return symbol;
       
  @@ -705,25 +970,25 @@
               String localpart = null;
               String rawname = null;
       
  -            fLength = 0;
  +            fCurrentEntity.length = 0;
               int colons = -1;
               int index = 0;
               if (XMLChar.isNameStart(peek())) {
                   colons = 0;
  -                fBuffer[fLength++] = (char)read();
  +                fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
                   int c = -1;
                   while (XMLChar.isName(c = peek())) {
                       if (c == ':') {
                           colons++;
                           if (colons == 1) {
  -                            index = fLength + 1;
  -                            prefix = fSymbolTable.addSymbol(fBuffer, 0, fLength);
  +                            index = fCurrentEntity.length + 1;
  +                            prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, fCurrentEntity.length);
                           }
                       }
  -                    fBuffer[fLength++] = (char)read();
  +                    fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
                   }
  -                localpart = fSymbolTable.addSymbol(fBuffer, index, fLength - index);
  -                rawname = fSymbolTable.addSymbol(fBuffer, 0, fLength);
  +                localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, index, fCurrentEntity.length - index);
  +                rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, fCurrentEntity.length);
               }
       
               if (colons >= 0 && colons < 2) {
  @@ -743,14 +1008,14 @@
           public int scanContent(XMLString content) 
               throws IOException, SAXException {
       
  -            fLength = 0;
  +            fCurrentEntity.length = 0;
               while (peek() != '<' && peek() != '&') {
  -                fBuffer[fLength++] = (char)read();
  -                if (fLength == fBuffer.length) {
  +                fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
  +                if (fCurrentEntity.length == fCurrentEntity.ch.length) {
                       break;
                   }
               }
  -            content.setValues(fBuffer, 0, fLength);
  +            content.setValues(fCurrentEntity.ch, 0, fCurrentEntity.length);
       
               return peek();
       
  @@ -765,14 +1030,14 @@
           public int scanAttContent(int quote, XMLString content)
               throws IOException, SAXException {
       
  -            fLength = 0;
  +            fCurrentEntity.length = 0;
               while (peek() != quote) {
  -                fBuffer[fLength++] = (char)read();
  -                if (fLength == fBuffer.length) {
  +                fCurrentEntity.ch[fCurrentEntity.length++] = (char)read();
  +                if (fCurrentEntity.length == fCurrentEntity.ch.length) {
                       break;
                   }
               }
  -            content.setValues(fBuffer, 0, fLength);
  +            content.setValues(fCurrentEntity.ch, 0, fCurrentEntity.length);
       
               return peek();
       
  @@ -891,18 +1156,18 @@
       
           /** Peeks the next character. */
           private final int peek() throws IOException {
  -            int c = fReader.read();
  +            int c = fCurrentEntity.reader.read();
               if (c == -1) {
                   throw new EOFException();
               }
               if (DEBUG) System.out.println("?"+(char)c);
  -            fReader.unread(c);
  +            ((PushbackReader)fCurrentEntity.reader).unread(c);
               return c;
           }
       
           /** Reads the next character. */
           private int read() throws IOException {
  -            int c = fReader.read();
  +            int c = fCurrentEntity.reader.read();
               if (c == -1) {
                   throw new EOFException();
               }
  @@ -912,14 +1177,95 @@
       
           private void unread(int c) throws IOException {
               if (DEBUG) System.out.println("-"+(char)c);
  -            fReader.unread(c);
  +            ((PushbackReader)fCurrentEntity.reader).unread(c);
           }
       
           private void unread(char[] ch, int offset, int length) throws IOException {
               if (DEBUG) System.out.println("-"+new String(ch, offset, length));
  -            fReader.unread(ch, offset, length);
  +            ((PushbackReader)fCurrentEntity.reader).unread(ch, offset, length);
           }
       
       } // class EntityScanner
  +
  +    /**
  +     * A reader that reads only one character at a time. This is
  +     * needed for those times when we've auto-detected the encoding
  +     * from an input stream and need to swap out the reader once
  +     * the xmlDecl/textDecl has been read and processed. If we
  +     * read too far, then we could erroneously convert bytes from
  +     * the input stream to the wrong character code point.
  +     *
  +     * @author Andy Clark, IBM
  +     */
  +    protected class OneCharReader
  +        extends FilterReader {
  +
  +        //
  +        // Data
  +        //
  +
  +        /** True if we've seen the end of the first markup. */
  +        private boolean seenEndOfMarkup;
  +
  +        //
  +        // Constructors
  +        //
  +
  +        /** Constructs this reader from another reader. */
  +        public OneCharReader(Reader reader) {
  +            super(reader);
  +        }
  +
  +        //
  +        // Public methods
  +        //
  +
  +        /** Returns the reader that is being wrapped. */
  +        public Reader getReader() {
  +            return in;
  +        } // getReader():Reader
  +
  +        //
  +        // Reader methods
  +        //
  +
  +        /** Returns a single character. */
  +        public int read() throws IOException {
  +
  +            // swap out this inefficient reader because we've
  +            // already passed the first piece of markup and
  +            // the encoding was not set
  +            if (seenEndOfMarkup) {
  +                fCurrentEntity.reader = getReader();
  +                return fCurrentEntity.reader.read();
  +            }
  +
  +            // read character and look for end of markup
  +            int c = in.read();
  +            seenEndOfMarkup = c == '>';
  +            return c;
  +
  +        } // read():int
  +
  +        /** 
  +         * Reads as many characters as possible which, in this case,
  +         * is only a single character.
  +         */
  +        public int read(char[] ch, int offset, int length)
  +            throws IOException {
  +
  +            // handle end of file
  +            int c = read();
  +            if (c == -1) {
  +                return 0;
  +            }
  +
  +            // return the 1 character
  +            ch[offset] = (char)c;
  +            return 1;
  +
  +        } // read(char[],int,int):int
  +
  +    } // class OneCharReader
   
   } // class XMLEntityManager
  
  
  
  1.1.2.4   +2 -1      xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityScanner.java
  
  Index: XMLEntityScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityScanner.java,v
  retrieving revision 1.1.2.3
  retrieving revision 1.1.2.4
  diff -u -r1.1.2.3 -r1.1.2.4
  --- XMLEntityScanner.java	2000/10/02 23:19:10	1.1.2.3
  +++ XMLEntityScanner.java	2000/10/05 01:59:28	1.1.2.4
  @@ -78,7 +78,7 @@
    * @author Stubs generated by DesignDoc on Mon Sep 18 18:23:16 PDT 2000
    * @author Andy Clark, IBM
    *
  - * @version $Id: XMLEntityScanner.java,v 1.1.2.3 2000/10/02 23:19:10 andyc Exp $
  + * @version $Id: XMLEntityScanner.java,v 1.1.2.4 2000/10/05 01:59:28 andyc Exp $
    *
    * @see XMLEntityManager
    */
  @@ -89,6 +89,7 @@
       // Public methods
       //
   
  +    public abstract void setEncoding(String encoding) throws IOException;
       public abstract int peekChar() throws IOException, SAXException;
       public abstract int scanChar() throws IOException, SAXException;
       public abstract String scanNmtoken() throws IOException, SAXException;
  
  
  
  1.1.2.5   +3 -6      xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLValidator.java
  
  Index: XMLValidator.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLValidator.java,v
  retrieving revision 1.1.2.4
  retrieving revision 1.1.2.5
  diff -u -r1.1.2.4 -r1.1.2.5
  --- XMLValidator.java	2000/10/02 23:19:11	1.1.2.4
  +++ XMLValidator.java	2000/10/05 01:59:29	1.1.2.5
  @@ -74,7 +74,7 @@
   
   /**
    * @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
  - * @version $Id: XMLValidator.java,v 1.1.2.4 2000/10/02 23:19:11 andyc Exp $
  + * @version $Id: XMLValidator.java,v 1.1.2.5 2000/10/05 01:59:29 andyc Exp $
    */
   public class XMLValidator
       implements XMLComponent, XMLDocumentFilter {
  @@ -200,10 +200,9 @@
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        * @param standalone 
        */
  -    public void xmlDecl(String version, String encoding, String actualEncoding, String standalone)
  +    public void xmlDecl(String version, String encoding, String standalone)
           throws SAXException {
       } // xmlDecl
   
  @@ -313,10 +312,8 @@
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        */
  -    public void textDecl(String version, String encoding, String actualEncoding)
  -        throws SAXException {
  +    public void textDecl(String version, String encoding) throws SAXException {
       } // textDecl
   
       /**
  
  
  
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.13  +3 -6      xml-xerces/java/src/org/apache/xerces/parsers/Attic/XMLDocumentParser.java
  
  Index: XMLDocumentParser.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/parsers/Attic/XMLDocumentParser.java,v
  retrieving revision 1.1.2.12
  retrieving revision 1.1.2.13
  diff -u -r1.1.2.12 -r1.1.2.13
  --- XMLDocumentParser.java	2000/10/04 20:45:37	1.1.2.12
  +++ XMLDocumentParser.java	2000/10/05 01:59:29	1.1.2.13
  @@ -77,7 +77,7 @@
   /**
    * @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
    * @author Arnaud  Le Hors, IBM
  - * @version $Id: XMLDocumentParser.java,v 1.1.2.12 2000/10/04 20:45:37 ericye Exp $
  + * @version $Id: XMLDocumentParser.java,v 1.1.2.13 2000/10/05 01:59:29 andyc Exp $
    */
   public abstract class XMLDocumentParser
       extends XMLParser
  @@ -204,10 +204,9 @@
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        * @param standalone 
        */
  -    public void xmlDecl(String version, String encoding, String actualEncoding, String standalone)
  +    public void xmlDecl(String version, String encoding, String standalone)
           throws SAXException {
       } // xmlDecl
   
  @@ -303,10 +302,8 @@
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        */
  -    public void textDecl(String version, String encoding, String actualEncoding)
  -        throws SAXException {
  +    public void textDecl(String version, String encoding) throws SAXException {
       } // textDecl
   
       /**
  
  
  
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.3   +11 -22    xml-xerces/java/src/org/apache/xerces/xni/Attic/XMLDocumentHandler.java
  
  Index: XMLDocumentHandler.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/xni/Attic/XMLDocumentHandler.java,v
  retrieving revision 1.1.2.2
  retrieving revision 1.1.2.3
  diff -u -r1.1.2.2 -r1.1.2.3
  --- XMLDocumentHandler.java	2000/10/02 00:40:16	1.1.2.2
  +++ XMLDocumentHandler.java	2000/10/05 01:59:30	1.1.2.3
  @@ -65,7 +65,7 @@
   
   /**
    * @author Stubs generated by DesignDoc on Mon Sep 18 18:23:16 PDT 2000
  - * @version $Id: XMLDocumentHandler.java,v 1.1.2.2 2000/10/02 00:40:16 lehors Exp $
  + * @version $Id: XMLDocumentHandler.java,v 1.1.2.3 2000/10/05 01:59:30 andyc Exp $
    */
   public interface XMLDocumentHandler
       extends XMLEntityHandler {
  @@ -77,18 +77,16 @@
       /**
        * startDocument
        */
  -    public void startDocument()
  -        throws SAXException;
  +    public void startDocument() throws SAXException;
   
       /**
        * xmlDecl
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        * @param standalone 
        */
  -    public void xmlDecl(String version, String encoding, String actualEncoding, String standalone)
  +    public void xmlDecl(String version, String encoding, String standalone)
           throws SAXException;
   
       /**
  @@ -106,8 +104,7 @@
        * 
        * @param text 
        */
  -    public void comment(XMLString text)
  -        throws SAXException;
  +    public void comment(XMLString text) throws SAXException;
   
       /**
        * processingInstruction
  @@ -150,51 +147,43 @@
        * 
        * @param text 
        */
  -    public void ignorableWhitespace(XMLString text)
  -        throws SAXException;
  +    public void ignorableWhitespace(XMLString text) throws SAXException;
   
       /**
        * endElement
        * 
        * @param element 
        */
  -    public void endElement(QName element)
  -        throws SAXException;
  +    public void endElement(QName element) throws SAXException;
   
       /**
        * endPrefixMapping
        * 
        * @param prefix 
        */
  -    public void endPrefixMapping(String prefix)
  -        throws SAXException;
  +    public void endPrefixMapping(String prefix) throws SAXException;
   
       /**
        * textDecl
        * 
        * @param version 
        * @param encoding 
  -     * @param actualEncoding 
        */
  -    public void textDecl(String version, String encoding, String actualEncoding)
  -        throws SAXException;
  +    public void textDecl(String version, String encoding) throws SAXException;
   
       /**
        * startCDATA
        */
  -    public void startCDATA()
  -        throws SAXException;
  +    public void startCDATA() throws SAXException;
   
       /**
        * endCDATA
        */
  -    public void endCDATA()
  -        throws SAXException;
  +    public void endCDATA() throws SAXException;
   
       /**
        * endDocument
        */
  -    public void endDocument()
  -        throws SAXException;
  +    public void endDocument() throws SAXException;
   
   } // interface XMLDocumentHandler