You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by co...@apache.org on 2003/01/09 18:43:15 UTC
cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser XercesEncodingDetector.java XMLEncodingDetector.java

costin      2003/01/09 09:43:15

  Modified:    jasper2/src/share/org/apache/jasper/xmlparser
                        XMLEncodingDetector.java
  Added:       jasper2/src/share/org/apache/jasper/xmlparser
                        XercesEncodingDetector.java
  Log:
  Remove the dependency of jasper on xerces.
  
  The "default" if xerces is not detected is to return UTF8 for encoding.
  This is probably not the best answer - the code should at least parse
  the file or try to do a simpler version of what xerces is doing, but
  it is far too complex.
  
  Now at least it works without xerces.
  
  The detection should be probably a commons package.
  
  Revision  Changes    Path
  1.3       +16 -1570  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java
  
  Index: XMLEncodingDetector.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- XMLEncodingDetector.java	7 Nov 2002 15:23:43 -0000	1.2
  +++ XMLEncodingDetector.java	9 Jan 2003 17:43:15 -0000	1.3
  @@ -57,64 +57,16 @@
   
   package org.apache.jasper.xmlparser;
   
  -import java.io.EOFException;
  -import java.io.InputStream;
  -import java.io.InputStreamReader;
   import java.io.IOException;
  -import java.io.Reader;
  -import java.util.Locale;
   import java.util.jar.JarFile;
   
   import org.apache.jasper.JasperException;
   import org.apache.jasper.JspCompilationContext;
   import org.apache.jasper.compiler.ErrorDispatcher;
  -import org.apache.jasper.compiler.JspUtil;
   
  -import org.apache.xerces.util.EncodingMap;
  -import org.apache.xerces.util.SymbolTable;
  -import org.apache.xerces.util.XMLChar;
  -import org.apache.xerces.util.XMLStringBuffer;
  -import org.apache.xerces.xni.XMLString;
   
   public class XMLEncodingDetector {
  -    
  -    private InputStream stream;
  -    private String encoding;
  -    private boolean isEncodingSetInProlog;
  -    private Boolean isBigEndian;
  -    private Reader reader;
  -    
  -    // org.apache.xerces.impl.XMLEntityManager fields
  -    public static final int DEFAULT_BUFFER_SIZE = 2048;
  -    public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
  -    private boolean fAllowJavaEncodings;
  -    private SymbolTable fSymbolTable;
  -    private XMLEncodingDetector fCurrentEntity;
  -    private int fBufferSize = DEFAULT_BUFFER_SIZE;
  -    
  -    // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
  -    private int lineNumber = 1;
  -    private int columnNumber = 1;
  -    private boolean literal;
  -    private char[] ch = new char[DEFAULT_BUFFER_SIZE];
  -    private int position;
  -    private int count;
  -    private boolean mayReadChunks = false;
  -    
  -    // org.apache.xerces.impl.XMLScanner fields
  -    private XMLString fString = new XMLString();    
  -    private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  -    private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  -    private final static String fVersionSymbol = "version";
  -    private final static String fEncodingSymbol = "encoding";
  -    private final static String fStandaloneSymbol = "standalone";
  -    
  -    // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
  -    private int fMarkupDepth = 0;
  -    private String[] fStrings = new String[3];
   
  -    private ErrorDispatcher err;
  -    
       /**
        * Autodetects the encoding of the XML document supplied by the given
        * input stream.
  @@ -131,1536 +83,30 @@
        * encoding was specified by the encoding attribute of an XML declaration
        * (prolog).
        */
  -    public static Object[] getEncoding(InputStream in, ErrorDispatcher err)
  -	throws IOException, JasperException
  +    public static Object[] getEncoding(String fname, JarFile jarFile,
  +                                       JspCompilationContext ctxt,
  +                                       ErrorDispatcher err)
  +            throws IOException, JasperException
       {
  -	XMLEncodingDetector detector = new XMLEncodingDetector(in, err);
  -	detector.createInitialReader();
  -	detector.scanXMLDecl();
  -	
  -	return new Object[] { detector.encoding,
  -			      new Boolean(detector.isEncodingSetInProlog) };
  +        XMLEncodingDetector detector=null;
  +        try {
  +            Class.forName( "org.apache.xerces.util.SymbolTable");
  +            Class detectorClass=Class.forName("org.apache.jasper.xmlparser.XercesEncodingDetector");
  +            detector=(XMLEncodingDetector)detectorClass.newInstance();
  +        } catch(Exception ex ) {
  +            detector=new XMLEncodingDetector();
  +        }
  +        return detector.getEncodingMethod(fname, jarFile, ctxt, err);
       }
   
  -    public static Object[] getEncoding(String fname, JarFile jarFile,
  +    public Object[] getEncodingMethod(String fname, JarFile jarFile,
   				       JspCompilationContext ctxt,
   				       ErrorDispatcher err)
   	throws IOException, JasperException
       {
  -	InputStream inStream = JspUtil.getInputStream(fname, jarFile,
  -						      ctxt, err);
  -	Object[] ret = getEncoding(inStream, err);
  -	inStream.close();
  -
  -	return ret;
  -    }
  -	
  -    /**
  -     * Constructor.
  -     */
  -    public XMLEncodingDetector(InputStream stream, ErrorDispatcher err) {
  -        this.stream = stream;
  -	this.err = err;
  -        fSymbolTable = new SymbolTable();
  -        fCurrentEntity = this;
  -    }
  -    
  -    // stub method
  -    void endEntity() {
  -    }
  -    
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.startEntity()
  -    private void createInitialReader() throws IOException, JasperException {
  -
  -	// wrap this stream in RewindableInputStream
  -	stream = new RewindableInputStream(stream);
  -
  -	// perform auto-detect of encoding if necessary
  -	if (encoding == null) {
  -	    // read first four bytes and determine encoding
  -	    final byte[] b4 = new byte[4];
  -	    int count = 0;
  -	    for (; count<4; count++ ) {
  -		b4[count] = (byte)stream.read();
  -	    }
  -	    if (count == 4) {
  -		Object [] encodingDesc = getEncodingName(b4, count);
  -		encoding = (String)(encodingDesc[0]);
  -		isBigEndian = (Boolean)(encodingDesc[1]);
  -
  -		stream.reset();
  -		// Special case UTF-8 files with BOM created by Microsoft
  -		// tools. It's more efficient to consume the BOM than make
  -		// the reader perform extra checks. -Ac
  -		if (count > 2 && encoding.equals("UTF-8")) {
  -		    int b0 = b4[0] & 0xFF;
  -		    int b1 = b4[1] & 0xFF;
  -		    int b2 = b4[2] & 0xFF;
  -		    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
  -			// ignore first three bytes...
  -			stream.skip(3);
  -		    }
  -		}
  -		reader = createReader(stream, encoding, isBigEndian);
  -	    } else {
  -		reader = createReader(stream, encoding, isBigEndian);
  -	    }
  -	}
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.createReader
  -    /**
  -     * Creates a reader capable of reading the given input stream in
  -     * the specified encoding.
  -     *
  -     * @param inputStream  The input stream.
  -     * @param encoding     The encoding name that the input stream is
  -     *                     encoded using. If the user has specified that
  -     *                     Java encoding names are allowed, then the
  -     *                     encoding name may be a Java encoding name;
  -     *                     otherwise, it is an ianaEncoding name.
  -     * @param isBigEndian   For encodings (like uCS-4), whose names cannot
  -     *                      specify a byte order, this tells whether the order
  -     *                      is bigEndian. null means unknown or not relevant.
  -     *
  -     * @return Returns a reader.
  -     */
  -    private Reader createReader(InputStream inputStream, String encoding,
  -				Boolean isBigEndian)
  -                throws IOException, JasperException {
  -
  -        // normalize encoding name
  -        if (encoding == null) {
  -            encoding = "UTF-8";
  -        }
  -
  -        // try to use an optimized reader
  -        String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
  -        if (ENCODING.equals("UTF-8")) {
  -            return new UTF8Reader(inputStream, fBufferSize, err);
  -        }
  -        if (ENCODING.equals("US-ASCII")) {
  -            return new ASCIIReader(inputStream, fBufferSize, err);
  -        }
  -        if (ENCODING.equals("ISO-10646-UCS-4")) {
  -            if (isBigEndian != null) {
  -                boolean isBE = isBigEndian.booleanValue();
  -                if (isBE) {
  -                    return new UCSReader(inputStream, UCSReader.UCS4BE);
  -                } else {
  -                    return new UCSReader(inputStream, UCSReader.UCS4LE);
  -                }
  -            } else {
  -                err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  -			     encoding);
  -            }
  -        }
  -        if (ENCODING.equals("ISO-10646-UCS-2")) {
  -            if (isBigEndian != null) { // sould never happen with this encoding...
  -                boolean isBE = isBigEndian.booleanValue();
  -                if (isBE) {
  -                    return new UCSReader(inputStream, UCSReader.UCS2BE);
  -                } else {
  -                    return new UCSReader(inputStream, UCSReader.UCS2LE);
  -                }
  -            } else {
  -                err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  -			     encoding);
  -            }
  -        }
  -
  -        // check for valid name
  -        boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
  -        boolean validJava = XMLChar.isValidJavaEncoding(encoding);
  -        if (!validIANA || (fAllowJavaEncodings && !validJava)) {
  -            err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
  -            // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
  -            //       because every byte is a valid ISO Latin 1 character.
  -            //       It may not translate correctly but if we failed on
  -            //       the encoding anyway, then we're expecting the content
  -            //       of the document to be bad. This will just prevent an
  -            //       invalid UTF-8 sequence to be detected. This is only
  -            //       important when continue-after-fatal-error is turned
  -            //       on. -Ac
  -            encoding = "ISO-8859-1";
  -        }
  -
  -        // try to use a Java reader
  -        String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
  -        if (javaEncoding == null) {
  -            if (fAllowJavaEncodings) {
  -		javaEncoding = encoding;
  -            } else {
  -                err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
  -                // see comment above.
  -                javaEncoding = "ISO8859_1";
  -            }
  -        }
  -        return new InputStreamReader(inputStream, javaEncoding);
  -
  -    } // createReader(InputStream,String, Boolean): Reader
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.getEncodingName
  -    /**
  -     * Returns the IANA encoding name that is auto-detected from
  -     * the bytes specified, with the endian-ness of that encoding where
  -     * appropriate.
  -     *
  -     * @param b4    The first four bytes of the input.
  -     * @param count The number of bytes actually read.
  -     * @return a 2-element array:  the first element, an IANA-encoding string,
  -     *  the second element a Boolean which is true iff the document is big
  -     *  endian, false if it's little-endian, and null if the distinction isn't
  -     *  relevant.
  -     */
  -    private Object[] getEncodingName(byte[] b4, int count) {
  -
  -        if (count < 2) {
  -            return new Object[]{"UTF-8", null};
  -        }
  -
  -        // UTF-16, with BOM
  -        int b0 = b4[0] & 0xFF;
  -        int b1 = b4[1] & 0xFF;
  -        if (b0 == 0xFE && b1 == 0xFF) {
  -            // UTF-16, big-endian
  -            return new Object [] {"UTF-16BE", new Boolean(true)};
  -        }
  -        if (b0 == 0xFF && b1 == 0xFE) {
  -            // UTF-16, little-endian
  -            return new Object [] {"UTF-16LE", new Boolean(false)};
  -        }
  -
  -        // default to UTF-8 if we don't have enough bytes to make a
  -        // good determination of the encoding
  -        if (count < 3) {
  -            return new Object [] {"UTF-8", null};
  -        }
  -
  -        // UTF-8 with a BOM
  -        int b2 = b4[2] & 0xFF;
  -        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
  -            return new Object [] {"UTF-8", null};
  -        }
  -
  -        // default to UTF-8 if we don't have enough bytes to make a
  -        // good determination of the encoding
  -        if (count < 4) {
  -            return new Object [] {"UTF-8", null};
  -        }
  -
  -        // other encodings
  -        int b3 = b4[3] & 0xFF;
  -        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
  -            // UCS-4, big endian (1234)
  -            return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
  -        }
  -        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
  -            // UCS-4, little endian (4321)
  -            return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
  -        }
  -        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
  -            // UCS-4, unusual octet order (2143)
  -            // REVISIT: What should this be?
  -            return new Object [] {"ISO-10646-UCS-4", null};
  -        }
  -        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
  -            // UCS-4, unusual octect order (3412)
  -            // REVISIT: What should this be?
  -            return new Object [] {"ISO-10646-UCS-4", null};
  -        }
  -        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
  -            // UTF-16, big-endian, no BOM
  -            // (or could turn out to be UCS-2...
  -            // REVISIT: What should this be?
  -            return new Object [] {"UTF-16BE", new Boolean(true)};
  -        }
  -        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
  -            // UTF-16, little-endian, no BOM
  -            // (or could turn out to be UCS-2...
  -            return new Object [] {"UTF-16LE", new Boolean(false)};
  -        }
  -        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
  -            // EBCDIC
  -            // a la xerces1, return CP037 instead of EBCDIC here
  -            return new Object [] {"CP037", null};
  -        }
  -
  -        // default encoding
  -        return new Object [] {"UTF-8", null};
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
  -    /** Returns true if the current entity being scanned is external. */
  -    public boolean isExternal() {
  -	return true;
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
  -    /**
  -     * Returns the next character on the input.
  -     * <p>
  -     * <strong>Note:</strong> The character is <em>not</em> consumed.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public int peekChar() throws IOException {
  -	
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -	
  -	// peek at character
  -	int c = fCurrentEntity.ch[fCurrentEntity.position];
  -
  -	// return peeked character
  -	if (fCurrentEntity.isExternal()) {
  -	    return c != '\r' ? c : '\n';
  -	}
  -	else {
  -	    return c;
  -	}
  -	
  -    } // peekChar():int
  -    
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
  -    /**
  -     * Returns the next character on the input.
  -     * <p>
  -     * <strong>Note:</strong> The character is consumed.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public int scanChar() throws IOException {
  -
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -
  -	// scan character
  -	int c = fCurrentEntity.ch[fCurrentEntity.position++];
  -	boolean external = false;
  -	if (c == '\n' ||
  -	    (c == '\r' && (external = fCurrentEntity.isExternal()))) {
  -	    fCurrentEntity.lineNumber++;
  -	    fCurrentEntity.columnNumber = 1;
  -	    if (fCurrentEntity.position == fCurrentEntity.count) {
  -		fCurrentEntity.ch[0] = (char)c;
  -		load(1, false);
  -	    }
  -	    if (c == '\r' && external) {
  -		if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  -		    fCurrentEntity.position--;
  -		}
  -		c = '\n';
  -	    }
  -	}
  -
  -	// return character that was scanned
  -	fCurrentEntity.columnNumber++;
  -	return c;
  -	
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
  -    /**
  -     * Returns a string matching the Name production appearing immediately
  -     * on the input as a symbol, or null if no Name string is present.
  -     * <p>
  -     * <strong>Note:</strong> The Name characters are consumed.
  -     * <p>
  -     * <strong>Note:</strong> The string returned must be a symbol. The
  -     * SymbolTable can be used for this purpose.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     *
  -     * @see org.apache.xerces.util.SymbolTable
  -     * @see org.apache.xerces.util.XMLChar#isName
  -     * @see org.apache.xerces.util.XMLChar#isNameStart
  -     */
  -    public String scanName() throws IOException {
  -	
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -	
  -	// scan name
  -	int offset = fCurrentEntity.position;
  -	if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  -	    if (++fCurrentEntity.position == fCurrentEntity.count) {
  -		fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  -		offset = 0;
  -		if (load(1, false)) {
  -		    fCurrentEntity.columnNumber++;
  -		    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
  -							   0, 1);
  -		    return symbol;
  -		}
  -	    }
  -	    while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  -		if (++fCurrentEntity.position == fCurrentEntity.count) {
  -		    int length = fCurrentEntity.position - offset;
  -		    if (length == fBufferSize) {
  -			// bad luck we have to resize our buffer
  -			char[] tmp = new char[fBufferSize * 2];
  -			System.arraycopy(fCurrentEntity.ch, offset,
  -					 tmp, 0, length);
  -			fCurrentEntity.ch = tmp;
  -			fBufferSize *= 2;
  -		    } else {
  -			System.arraycopy(fCurrentEntity.ch, offset,
  -					 fCurrentEntity.ch, 0, length);
  -		    }
  -		    offset = 0;
  -		    if (load(length, false)) {
  -			break;
  -		    }
  -		}
  -	    }
  -	}
  -	int length = fCurrentEntity.position - offset;
  -	fCurrentEntity.columnNumber += length;
  -
  -	// return name
  -	String symbol = null;
  -	if (length > 0) {
  -	    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  -	}
  -	return symbol;
  -	
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
  -    /**
  -     * Scans a range of attribute value data, setting the fields of the
  -     * XMLString structure, appropriately.
  -     * <p>
  -     * <strong>Note:</strong> The characters are consumed.
  -     * <p>
  -     * <strong>Note:</strong> This method does not guarantee to return
  -     * the longest run of attribute value data. This method may return
  -     * before the quote character due to reaching the end of the input
  -     * buffer or any other reason.
  -     * <p>
  -     * <strong>Note:</strong> The fields contained in the XMLString
  -     * structure are not guaranteed to remain valid upon subsequent calls
  -     * to the entity scanner. Therefore, the caller is responsible for
  -     * immediately using the returned character data or making a copy of
  -     * the character data.
  -     *
  -     * @param quote   The quote character that signifies the end of the
  -     *                attribute value data.
  -     * @param content The content structure to fill.
  -     *
  -     * @return Returns the next character on the input, if known. This
  -     *         value may be -1 but this does <em>note</em> designate
  -     *         end of file.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public int scanLiteral(int quote, XMLString content)
  -	throws IOException {
  -
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  -	    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  -	    load(1, false);
  -	    fCurrentEntity.position = 0;
  -	}
  -
  -	// normalize newlines
  -	int offset = fCurrentEntity.position;
  -	int c = fCurrentEntity.ch[offset];
  -	int newlines = 0;
  -	boolean external = fCurrentEntity.isExternal();
  -	if (c == '\n' || (c == '\r' && external)) {
  -	    do {
  -		c = fCurrentEntity.ch[fCurrentEntity.position++];
  -		if (c == '\r' && external) {
  -		    newlines++;
  -		    fCurrentEntity.lineNumber++;
  -		    fCurrentEntity.columnNumber = 1;
  -		    if (fCurrentEntity.position == fCurrentEntity.count) {
  -			offset = 0;
  -			fCurrentEntity.position = newlines;
  -			if (load(newlines, false)) {
  -			    break;
  -			}
  -		    }
  -		    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  -			fCurrentEntity.position++;
  -			offset++;
  -		    }
  -		    /*** NEWLINE NORMALIZATION ***/
  -		    else {
  -			newlines++;
  -		    }
  -		    /***/
  -		}
  -		else if (c == '\n') {
  -		    newlines++;
  -		    fCurrentEntity.lineNumber++;
  -		    fCurrentEntity.columnNumber = 1;
  -		    if (fCurrentEntity.position == fCurrentEntity.count) {
  -			offset = 0;
  -			fCurrentEntity.position = newlines;
  -			if (load(newlines, false)) {
  -			    break;
  -			}
  -		    }
  -		    /*** NEWLINE NORMALIZATION ***
  -			 if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  -			 && external) {
  -			 fCurrentEntity.position++;
  -			 offset++;
  -			 }
  -			 /***/
  -		}
  -		else {
  -		    fCurrentEntity.position--;
  -		    break;
  -		}
  -	    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  -	    for (int i = offset; i < fCurrentEntity.position; i++) {
  -		fCurrentEntity.ch[i] = '\n';
  -	    }
  -	    int length = fCurrentEntity.position - offset;
  -	    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  -		content.setValues(fCurrentEntity.ch, offset, length);
  -		return -1;
  -	    }
  -	}
  -
  -	// scan literal value
  -	while (fCurrentEntity.position < fCurrentEntity.count) {
  -	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  -	    if ((c == quote &&
  -		 (!fCurrentEntity.literal || external))
  -		|| c == '%' || !XMLChar.isContent(c)) {
  -		fCurrentEntity.position--;
  -		break;
  -	    }
  -	}
  -	int length = fCurrentEntity.position - offset;
  -	fCurrentEntity.columnNumber += length - newlines;
  -	content.setValues(fCurrentEntity.ch, offset, length);
  -
  -	// return next character
  -	if (fCurrentEntity.position != fCurrentEntity.count) {
  -	    c = fCurrentEntity.ch[fCurrentEntity.position];
  -	    // NOTE: We don't want to accidentally signal the
  -	    //       end of the literal if we're expanding an
  -	    //       entity appearing in the literal. -Ac
  -	    if (c == quote && fCurrentEntity.literal) {
  -		c = -1;
  -	    }
  -	}
  -	else {
  -	    c = -1;
  -	}
  -	return c;
  -
  -    }
  -
  -    /**
  -     * Scans a range of character data up to the specified delimiter,
  -     * setting the fields of the XMLString structure, appropriately.
  -     * <p>
  -     * <strong>Note:</strong> The characters are consumed.
  -     * <p>
  -     * <strong>Note:</strong> This assumes that the internal buffer is
  -     * at least the same size, or bigger, than the length of the delimiter
  -     * and that the delimiter contains at least one character.
  -     * <p>
  -     * <strong>Note:</strong> This method does not guarantee to return
  -     * the longest run of character data. This method may return before
  -     * the delimiter due to reaching the end of the input buffer or any
  -     * other reason.
  -     * <p>
  -     * <strong>Note:</strong> The fields contained in the XMLString
  -     * structure are not guaranteed to remain valid upon subsequent calls
  -     * to the entity scanner. Therefore, the caller is responsible for
  -     * immediately using the returned character data or making a copy of
  -     * the character data.
  -     *
  -     * @param delimiter The string that signifies the end of the character
  -     *                  data to be scanned.
  -     * @param data      The data structure to fill.
  -     *
  -     * @return Returns true if there is more data to scan, false otherwise.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public boolean scanData(String delimiter, XMLStringBuffer buffer)
  -	throws IOException {
  -
  -	boolean done = false;
  -	int delimLen = delimiter.length();
  -	char charAt0 = delimiter.charAt(0);
  -	boolean external = fCurrentEntity.isExternal();
  -	do {
  -    
  -	    // load more characters, if needed
  -    
  -	    if (fCurrentEntity.position == fCurrentEntity.count) {
  -		load(0, true);
  -	    }
  -	    else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  -		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
  -				 fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
  -		load(fCurrentEntity.count - fCurrentEntity.position, false);
  -		fCurrentEntity.position = 0;
  -	    } 
  -	    if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  -		// something must be wrong with the input: e.g., file ends an
  -		// unterminated comment
  -		int length = fCurrentEntity.count - fCurrentEntity.position;
  -		buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
  -			       length); 
  -		fCurrentEntity.columnNumber += fCurrentEntity.count;
  -		fCurrentEntity.position = fCurrentEntity.count;
  -		load(0,true);
  -		return false;
  -	    }
  -    
  -	    // normalize newlines
  -	    int offset = fCurrentEntity.position;
  -	    int c = fCurrentEntity.ch[offset];
  -	    int newlines = 0;
  -	    if (c == '\n' || (c == '\r' && external)) {
  -		do {
  -		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  -		    if (c == '\r' && external) {
  -			newlines++;
  -			fCurrentEntity.lineNumber++;
  -			fCurrentEntity.columnNumber = 1;
  -			if (fCurrentEntity.position == fCurrentEntity.count) {
  -			    offset = 0;
  -			    fCurrentEntity.position = newlines;
  -			    if (load(newlines, false)) {
  -				break;
  -			    }
  -			}
  -			if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  -			    fCurrentEntity.position++;
  -			    offset++;
  -			}
  -			/*** NEWLINE NORMALIZATION ***/
  -			else {
  -			    newlines++;
  -			}
  -		    }
  -		    else if (c == '\n') {
  -			newlines++;
  -			fCurrentEntity.lineNumber++;
  -			fCurrentEntity.columnNumber = 1;
  -			if (fCurrentEntity.position == fCurrentEntity.count) {
  -			    offset = 0;
  -			    fCurrentEntity.position = newlines;
  -			    fCurrentEntity.count = newlines;
  -			    if (load(newlines, false)) {
  -				break;
  -			    }
  -			}
  -		    }
  -		    else {
  -			fCurrentEntity.position--;
  -			break;
  -		    }
  -		} while (fCurrentEntity.position < fCurrentEntity.count - 1);
  -		for (int i = offset; i < fCurrentEntity.position; i++) {
  -		    fCurrentEntity.ch[i] = '\n';
  -		}
  -		int length = fCurrentEntity.position - offset;
  -		if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  -		    buffer.append(fCurrentEntity.ch, offset, length);
  -		    return true;
  -		}
  -	    }
  -    
  -	    // iterate over buffer looking for delimiter
  -	OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  -	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  -	    if (c == charAt0) {
  -		// looks like we just hit the delimiter
  -		int delimOffset = fCurrentEntity.position - 1;
  -		for (int i = 1; i < delimLen; i++) {
  -		    if (fCurrentEntity.position == fCurrentEntity.count) {
  -			fCurrentEntity.position -= i;
  -			break OUTER;
  -		    }
  -		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  -		    if (delimiter.charAt(i) != c) {
  -			fCurrentEntity.position--;
  -			break;
  -		    }
  -		}
  -		if (fCurrentEntity.position == delimOffset + delimLen) {
  -		    done = true;
  -		    break;
  -		}
  -	    }
  -	    else if (c == '\n' || (external && c == '\r')) {
  -		fCurrentEntity.position--;
  -		break;
  -	    }
  -	    else if (XMLChar.isInvalid(c)) {
  -		fCurrentEntity.position--;
  -		int length = fCurrentEntity.position - offset;
  -		fCurrentEntity.columnNumber += length - newlines;
  -		buffer.append(fCurrentEntity.ch, offset, length); 
  -		return true;
  -	    }
  -	}
  -	    int length = fCurrentEntity.position - offset;
  -	    fCurrentEntity.columnNumber += length - newlines;
  -	    if (done) {
  -		length -= delimLen;
  -	    }
  -	    buffer.append (fCurrentEntity.ch, offset, length);
  -    
  -	    // return true if string was skipped
  -	} while (!done);
  -	return !done;
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
  -    /**
  -     * Skips a character appearing immediately on the input.
  -     * <p>
  -     * <strong>Note:</strong> The character is consumed only if it matches
  -     * the specified character.
  -     *
  -     * @param c The character to skip.
  -     *
  -     * @return Returns true if the character was skipped.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public boolean skipChar(int c) throws IOException {
  -
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -
  -	// skip character
  -	int cc = fCurrentEntity.ch[fCurrentEntity.position];
  -	if (cc == c) {
  -	    fCurrentEntity.position++;
  -	    if (c == '\n') {
  -		fCurrentEntity.lineNumber++;
  -		fCurrentEntity.columnNumber = 1;
  -	    }
  -	    else {
  -		fCurrentEntity.columnNumber++;
  -	    }
  -	    return true;
  -	} else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
  -	    // handle newlines
  -	    if (fCurrentEntity.position == fCurrentEntity.count) {
  -		fCurrentEntity.ch[0] = (char)cc;
  -		load(1, false);
  -	    }
  -	    fCurrentEntity.position++;
  -	    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  -		fCurrentEntity.position++;
  -	    }
  -	    fCurrentEntity.lineNumber++;
  -	    fCurrentEntity.columnNumber = 1;
  -	    return true;
  -	}
  -
  -	// character was not skipped
  -	return false;
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
  -    /**
  -     * Skips space characters appearing immediately on the input.
  -     * <p>
  -     * <strong>Note:</strong> The characters are consumed only if they are
  -     * space characters.
  -     *
  -     * @return Returns true if at least one space character was skipped.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     *
  -     * @see org.apache.xerces.util.XMLChar#isSpace
  -     */
  -    public boolean skipSpaces() throws IOException {
  -
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -
  -	// skip spaces
  -	int c = fCurrentEntity.ch[fCurrentEntity.position];
  -	if (XMLChar.isSpace(c)) {
  -	    boolean external = fCurrentEntity.isExternal();
  -	    do {
  -		boolean entityChanged = false;
  -		// handle newlines
  -		if (c == '\n' || (external && c == '\r')) {
  -		    fCurrentEntity.lineNumber++;
  -		    fCurrentEntity.columnNumber = 1;
  -		    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  -			fCurrentEntity.ch[0] = (char)c;
  -			entityChanged = load(1, true);
  -			if (!entityChanged)
  -                                // the load change the position to be 1,
  -                                // need to restore it when entity not changed
  -			    fCurrentEntity.position = 0;
  -		    }
  -		    if (c == '\r' && external) {
  -			// REVISIT: Does this need to be updated to fix the
  -			//          #x0D ^#x0A newline normalization problem? -Ac
  -			if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  -			    fCurrentEntity.position--;
  -			}
  -		    }
  -		    /*** NEWLINE NORMALIZATION ***
  -			 else {
  -			 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  -			 && external) {
  -			 fCurrentEntity.position++;
  -			 }
  -			 }
  -			 /***/
  -		}
  -		else {
  -		    fCurrentEntity.columnNumber++;
  -		}
  -		// load more characters, if needed
  -		if (!entityChanged)
  -		    fCurrentEntity.position++;
  -		if (fCurrentEntity.position == fCurrentEntity.count) {
  -		    load(0, true);
  -		}
  -	    } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
  -	    return true;
  -	}
  -
  -	// no spaces were found
  -	return false;
  -
  -    }
  -
  -    /**
  -     * Skips the specified string appearing immediately on the input.
  -     * <p>
  -     * <strong>Note:</strong> The characters are consumed only if they are
  -     * space characters.
  -     *
  -     * @param s The string to skip.
  -     *
  -     * @return Returns true if the string was skipped.
  -     *
  -     * @throws IOException  Thrown if i/o error occurs.
  -     * @throws EOFException Thrown on end of file.
  -     */
  -    public boolean skipString(String s) throws IOException {
  -
  -	// load more characters, if needed
  -	if (fCurrentEntity.position == fCurrentEntity.count) {
  -	    load(0, true);
  -	}
  -
  -	// skip string
  -	final int length = s.length();
  -	for (int i = 0; i < length; i++) {
  -	    char c = fCurrentEntity.ch[fCurrentEntity.position++];
  -	    if (c != s.charAt(i)) {
  -		fCurrentEntity.position -= i + 1;
  -		return false;
  -	    }
  -	    if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
  -		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
  -		// REVISIT: Can a string to be skipped cross an
  -		//          entity boundary? -Ac
  -		if (load(i + 1, false)) {
  -		    fCurrentEntity.position -= i + 1;
  -		    return false;
  -		}
  -	    }
  -	}
  -	fCurrentEntity.columnNumber += length;
  -	return true;
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
  -    /**
  -     * Loads a chunk of text.
  -     *
  -     * @param offset       The offset into the character buffer to
  -     *                     read the next batch of characters.
  -     * @param changeEntity True if the load should change entities
  -     *                     at the end of the entity, otherwise leave
  -     *                     the current entity in place and the entity
  -     *                     boundary will be signaled by the return
  -     *                     value.
  -     *
  -     * @returns Returns true if the entity changed as a result of this
  -     *          load operation.
  -     */
  -    final boolean load(int offset, boolean changeEntity)
  -	throws IOException {
  -
  -	// read characters
  -	int length = fCurrentEntity.mayReadChunks?
  -	    (fCurrentEntity.ch.length - offset):
  -	    (DEFAULT_XMLDECL_BUFFER_SIZE);
  -	int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
  -					       length);
  -
  -	// reset count and position
  -	boolean entityChanged = false;
  -	if (count != -1) {
  -	    if (count != 0) {
  -		fCurrentEntity.count = count + offset;
  -		fCurrentEntity.position = offset;
  -	    }
  -	}
  -
  -	// end of this entity
  -	else {
  -	    fCurrentEntity.count = offset;
  -	    fCurrentEntity.position = offset;
  -	    entityChanged = true;
  -	    if (changeEntity) {
  -		endEntity();
  -		if (fCurrentEntity == null) {
  -		    throw new EOFException();
  -		}
  -		// handle the trailing edges
  -		if (fCurrentEntity.position == fCurrentEntity.count) {
  -		    load(0, false);
  -		}
  -	    }
  -	}
  -
  -	return entityChanged;
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
  -    /**
  -     * This class wraps the byte inputstreams we're presented with.
  -     * We need it because java.io.InputStreams don't provide
  -     * functionality to reread processed bytes, and they have a habit
  -     * of reading more than one character when you call their read()
  -     * methods.  This means that, once we discover the true (declared)
  -     * encoding of a document, we can neither backtrack to read the
  -     * whole doc again nor start reading where we are with a new
  -     * reader.
  -     *
  -     * This class allows rewinding an inputStream by allowing a mark
  -     * to be set, and the stream reset to that position.  <strong>The
  -     * class assumes that it needs to read one character per
  -     * invocation when it's read() method is inovked, but uses the
  -     * underlying InputStream's read(char[], offset length) method--it
  -     * won't buffer data read this way!</strong>
  -     *
  -     * @author Neil Graham, IBM
  -     * @author Glenn Marcy, IBM
  -     */
  -    private final class RewindableInputStream extends InputStream {
  -
  -        private InputStream fInputStream;
  -        private byte[] fData;
  -        private int fStartOffset;
  -        private int fEndOffset;
  -        private int fOffset;
  -        private int fLength;
  -        private int fMark;
  -
  -        public RewindableInputStream(InputStream is) {
  -            fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
  -            fInputStream = is;
  -            fStartOffset = 0;
  -            fEndOffset = -1;
  -            fOffset = 0;
  -            fLength = 0;
  -            fMark = 0;
  -        }
  -
  -        public void setStartOffset(int offset) {
  -            fStartOffset = offset;
  -        }
  -
  -        public void rewind() {
  -            fOffset = fStartOffset;
  -        }
  -
  -        public int read() throws IOException {
  -            int b = 0;
  -            if (fOffset < fLength) {
  -                return fData[fOffset++] & 0xff;
  -            }
  -            if (fOffset == fEndOffset) {
  -                return -1;
  -            }
  -            if (fOffset == fData.length) {
  -                byte[] newData = new byte[fOffset << 1];
  -                System.arraycopy(fData, 0, newData, 0, fOffset);
  -                fData = newData;
  -            }
  -            b = fInputStream.read();
  -            if (b == -1) {
  -                fEndOffset = fOffset;
  -                return -1;
  -            }
  -            fData[fLength++] = (byte)b;
  -            fOffset++;
  -            return b & 0xff;
  -        }
  -
  -        public int read(byte[] b, int off, int len) throws IOException {
  -            int bytesLeft = fLength - fOffset;
  -            if (bytesLeft == 0) {
  -                if (fOffset == fEndOffset) {
  -                    return -1;
  -                }
  -                // better get some more for the voracious reader...
  -                if (fCurrentEntity.mayReadChunks) {
  -                    return fInputStream.read(b, off, len);
  -                }
  -                int returnedVal = read();
  -                if (returnedVal == -1) {
  -                    fEndOffset = fOffset;
  -                    return -1;
  -                }
  -                b[off] = (byte)returnedVal;
  -                return 1;
  -            }
  -            if (len < bytesLeft) {
  -                if (len <= 0) {
  -                    return 0;
  -                }
  -            }
  -            else {
  -                len = bytesLeft;
  -            }
  -            if (b != null) {
  -                System.arraycopy(fData, fOffset, b, off, len);
  -            }
  -            fOffset += len;
  -            return len;
  -        }
  -
  -        public long skip(long n)
  -            throws IOException
  -        {
  -            int bytesLeft;
  -            if (n <= 0) {
  -                return 0;
  -            }
  -            bytesLeft = fLength - fOffset;
  -            if (bytesLeft == 0) {
  -                if (fOffset == fEndOffset) {
  -                    return 0;
  -                }
  -                return fInputStream.skip(n);
  -            }
  -            if (n <= bytesLeft) {
  -                fOffset += n;
  -                return n;
  -            }
  -            fOffset += bytesLeft;
  -            if (fOffset == fEndOffset) {
  -                return bytesLeft;
  -            }
  -            n -= bytesLeft;
  -	    /*
  -	     * In a manner of speaking, when this class isn't permitting more
  -	     * than one byte at a time to be read, it is "blocking".  The
  -	     * available() method should indicate how much can be read without
  -	     * blocking, so while we're in this mode, it should only indicate
  -	     * that bytes in its buffer are available; otherwise, the result of
  -	     * available() on the underlying InputStream is appropriate.
  -	     */
  -            return fInputStream.skip(n) + bytesLeft;
  -        }
  -
  -        public int available() throws IOException {
  -            int bytesLeft = fLength - fOffset;
  -            if (bytesLeft == 0) {
  -                if (fOffset == fEndOffset) {
  -                    return -1;
  -                }
  -                return fCurrentEntity.mayReadChunks ? fInputStream.available()
  -		    : 0;
  -            }
  -            return bytesLeft;
  -        }
  -
  -        public void mark(int howMuch) {
  -            fMark = fOffset;
  -        }
  -
  -        public void reset() {
  -            fOffset = fMark;
  -        }
  -
  -        public boolean markSupported() {
  -            return true;
  -        }
  -
  -        public void close() throws IOException {
  -            if (fInputStream != null) {
  -                fInputStream.close();
  -                fInputStream = null;
  -            }
  -        }
  -    } // end of RewindableInputStream class
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
  -    private void scanXMLDecl() throws IOException, JasperException {
  -
  -	if (skipString("<?xml")) {
  -	    fMarkupDepth++;
  -	    // NOTE: special case where document starts with a PI
  -	    //       whose name starts with "xml" (e.g. "xmlfoo")
  -	    if (XMLChar.isName(peekChar())) {
  -		fStringBuffer.clear();
  -		fStringBuffer.append("xml");
  -		while (XMLChar.isName(peekChar())) {
  -		    fStringBuffer.append((char)scanChar());
  -		}
  -		String target = fSymbolTable.addSymbol(fStringBuffer.ch,
  -						       fStringBuffer.offset,
  -						       fStringBuffer.length);
  -		scanPIData(target, fString);
  -	    }
  -
  -	    // standard XML declaration
  -	    else {
  -		scanXMLDeclOrTextDecl(false);
  -	    }
  -	}
  -    }
  -    
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
  -    /**
  -     * Scans an XML or text declaration.
  -     * <p>
  -     * <pre>
  -     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  -     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  -     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
  -     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  -     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  -     *                 | ('"' ('yes' | 'no') '"'))
  -     *
  -     * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
  -     * </pre>
  -     *
  -     * @param scanningTextDecl True if a text declaration is to
  -     *                         be scanned instead of an XML
  -     *                         declaration.
  -     */
  -    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
  -        throws IOException, JasperException {
  -
  -        // scan decl
  -        scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
  -        fMarkupDepth--;
  -
  -        // pseudo-attribute values
  -        String encodingPseudoAttr = fStrings[1];
  -
  -        // set encoding on reader
  -        if (encodingPseudoAttr != null) {
  -            isEncodingSetInProlog = true;
  -	    encoding = encodingPseudoAttr;
  -        }
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
  -    /**
  -     * Scans an XML or text declaration.
  -     * <p>
  -     * <pre>
  -     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  -     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  -     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
  -     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  -     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  -     *                 | ('"' ('yes' | 'no') '"'))
  -     *
  -     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  -     * </pre>
  -     *
  -     * @param scanningTextDecl True if a text declaration is to
  -     *                         be scanned instead of an XML
  -     *                         declaration.
  -     * @param pseudoAttributeValues An array of size 3 to return the version,
  -     *                         encoding and standalone pseudo attribute values
  -     *                         (in that order).
  -     *
  -     * <strong>Note:</strong> This method uses fString, anything in it
  -     * at the time of calling is lost.
  -     */
  -    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  -				       String[] pseudoAttributeValues) 
  -                throws IOException, JasperException {
  -
  -        // pseudo-attribute values
  -        String version = null;
  -        String encoding = null;
  -        String standalone = null;
  -
  -        // scan pseudo-attributes
  -        final int STATE_VERSION = 0;
  -        final int STATE_ENCODING = 1;
  -        final int STATE_STANDALONE = 2;
  -        final int STATE_DONE = 3;
  -        int state = STATE_VERSION;
  -
  -        boolean dataFoundForTarget = false;
  -        boolean sawSpace = skipSpaces();
  -        while (peekChar() != '?') {
  -            dataFoundForTarget = true;
  -            String name = scanPseudoAttribute(scanningTextDecl, fString);
  -            switch (state) {
  -                case STATE_VERSION: {
  -                    if (name == fVersionSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                       ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
  -                                       : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
  -                                             null);
  -                        }
  -                        version = fString.toString();
  -                        state = STATE_ENCODING;
  -                        if (!version.equals("1.0")) {
  -                            // REVISIT: XML REC says we should throw an error
  -			    // in such cases.
  -                            // some may object the throwing of fatalError.
  -                            err.jspError("jsp.error.xml.versionNotSupported",
  -					 version);
  -                        }
  -                    } else if (name == fEncodingSymbol) {
  -                        if (!scanningTextDecl) {
  -                            err.jspError("jsp.error.xml.versionInfoRequired");
  -                        }
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
  -                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
  -                                             null);
  -                        }
  -                        encoding = fString.toString();
  -                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  -                    } else {
  -                        if (scanningTextDecl) {
  -                            err.jspError("jsp.error.xml.encodingDeclRequired");
  -                        }
  -                        else {
  -                            err.jspError("jsp.error.xml.versionInfoRequired");
  -                        }
  -                    }
  -                    break;
  -                }
  -                case STATE_ENCODING: {
  -                    if (name == fEncodingSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
  -                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
  -                                             null);
  -                        }
  -                        encoding = fString.toString();
  -                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  -                        // TODO: check encoding name; set encoding on
  -                        //       entity scanner
  -                    } else if (!scanningTextDecl && name == fStandaloneSymbol) {
  -                        if (!sawSpace) {
  -                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
  -                        }
  -                        standalone = fString.toString();
  -                        state = STATE_DONE;
  -                        if (!standalone.equals("yes") && !standalone.equals("no")) {
  -                            err.jspError("jsp.error.xml.sdDeclInvalid");
  -                        }
  -                    } else {
  -                        err.jspError("jsp.error.xml.encodingDeclRequired");
  -                    }
  -                    break;
  -                }
  -                case STATE_STANDALONE: {
  -                    if (name == fStandaloneSymbol) {
  -                        if (!sawSpace) {
  -                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
  -                        }
  -                        standalone = fString.toString();
  -                        state = STATE_DONE;
  -                        if (!standalone.equals("yes") && !standalone.equals("no")) {
  -                            err.jspError("jsp.error.xml.sdDeclInvalid");
  -                        }
  -                    } else {
  -			err.jspError("jsp.error.xml.encodingDeclRequired");
  -                    }
  -                    break;
  -                }
  -                default: {
  -                    err.jspError("jsp.error.xml.noMorePseudoAttributes");
  -                }
  -            }
  -            sawSpace = skipSpaces();
  -        }
  -        // REVISIT: should we remove this error reporting?
  -        if (scanningTextDecl && state != STATE_DONE) {
  -            err.jspError("jsp.error.xml.morePseudoAttributes");
  -        }
  -        
  -        // If there is no data in the xml or text decl then we fail to report
  -	// error for version or encoding info above.
  -        if (scanningTextDecl) {
  -            if (!dataFoundForTarget && encoding == null) {
  -                err.jspError("jsp.error.xml.encodingDeclRequired");
  -            }
  -        } else {
  -            if (!dataFoundForTarget && version == null) {
  -                err.jspError("jsp.error.xml.versionInfoRequired");
  -            }
  -        }
  -
  -        // end
  -        if (!skipChar('?')) {
  -            err.jspError("jsp.error.xml.xmlDeclUnterminated");
  -        }
  -        if (!skipChar('>')) {
  -            err.jspError("jsp.error.xml.xmlDeclUnterminated");
  -
  -        }
  -        
  -        // fill in return array
  -        pseudoAttributeValues[0] = version;
  -        pseudoAttributeValues[1] = encoding;
  -        pseudoAttributeValues[2] = standalone;
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
  -    /**
  -     * Scans a pseudo attribute.
  -     *
  -     * @param scanningTextDecl True if scanning this pseudo-attribute for a
  -     *                         TextDecl; false if scanning XMLDecl. This 
  -     *                         flag is needed to report the correct type of
  -     *                         error.
  -     * @param value            The string to fill in with the attribute 
  -     *                         value.
  -     *
  -     * @return The name of the attribute
  -     *
  -     * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
  -     * at the time of calling is lost.
  -     */
  -    public String scanPseudoAttribute(boolean scanningTextDecl, 
  -                                      XMLString value) 
  -                throws IOException, JasperException {
  -
  -        String name = scanName();
  -        if (name == null) {
  -            err.jspError("jsp.error.xml.pseudoAttrNameExpected");
  -        }
  -        skipSpaces();
  -        if (!skipChar('=')) {
  -            reportFatalError(scanningTextDecl ?
  -			     "jsp.error.xml.eqRequiredInTextDecl"
  -                             : "jsp.error.xml.eqRequiredInXMLDecl",
  -			     name);
  -        }
  -        skipSpaces();
  -        int quote = peekChar();
  -        if (quote != '\'' && quote != '"') {
  -            reportFatalError(scanningTextDecl ?
  -			     "jsp.error.xml.quoteRequiredInTextDecl"
  -                             : "jsp.error.xml.quoteRequiredInXMLDecl" ,
  -			     name);
  -        }
  -        scanChar();
  -        int c = scanLiteral(quote, value);
  -        if (c != quote) {
  -            fStringBuffer2.clear();
  -            do {
  -                fStringBuffer2.append(value);
  -                if (c != -1) {
  -                    if (c == '&' || c == '%' || c == '<' || c == ']') {
  -                        fStringBuffer2.append((char)scanChar());
  -                    }
  -                    else if (XMLChar.isHighSurrogate(c)) {
  -                        scanSurrogates(fStringBuffer2);
  -                    }
  -                    else if (XMLChar.isInvalid(c)) {
  -                        String key = scanningTextDecl
  -                            ? "jsp.error.xml.invalidCharInTextDecl"
  -			    : "jsp.error.xml.invalidCharInXMLDecl";
  -                        reportFatalError(key, Integer.toString(c, 16));
  -                        scanChar();
  -                    }
  -                }
  -                c = scanLiteral(quote, value);
  -            } while (c != quote);
  -            fStringBuffer2.append(value);
  -            value.setValues(fStringBuffer2);
  -        }
  -        if (!skipChar(quote)) {
  -            reportFatalError(scanningTextDecl ?
  -			     "jsp.error.xml.closeQuoteMissingInTextDecl"
  -                             : "jsp.error.xml.closeQuoteMissingInXMLDecl",
  -			     name);
  -        }
  -
  -        // return
  -        return name;
  -
  +        Object result[]=new Object[]{ "UTF8", new Boolean(false) };
  +        return result;
       }
  -    
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLScanner.scanPIData
  -    /**
  -     * Scans a processing data. This is needed to handle the situation
  -     * where a document starts with a processing instruction whose 
  -     * target name <em>starts with</em> "xml". (e.g. xmlfoo)
  -     *
  -     * <strong>Note:</strong> This method uses fStringBuffer, anything in it
  -     * at the time of calling is lost.
  -     *
  -     * @param target The PI target
  -     * @param data The string to fill in with the data
  -     */
  -    private void scanPIData(String target, XMLString data) 
  -        throws IOException, JasperException {
  -
  -        // check target
  -        if (target.length() == 3) {
  -            char c0 = Character.toLowerCase(target.charAt(0));
  -            char c1 = Character.toLowerCase(target.charAt(1));
  -            char c2 = Character.toLowerCase(target.charAt(2));
  -            if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
  -                err.jspError("jsp.error.xml.reservedPITarget");
  -            }
  -        }
  -
  -        // spaces
  -        if (!skipSpaces()) {
  -            if (skipString("?>")) {
  -                // we found the end, there is no data
  -                data.clear();
  -                return;
  -            }
  -            else {
  -                // if there is data there should be some space
  -                err.jspError("jsp.error.xml.spaceRequiredInPI");
  -            }
  -        }
  -
  -        fStringBuffer.clear();
  -        // data
  -        if (scanData("?>", fStringBuffer)) {
  -            do {
  -                int c = peekChar();
  -                if (c != -1) {
  -                    if (XMLChar.isHighSurrogate(c)) {
  -                        scanSurrogates(fStringBuffer);
  -                    } else if (XMLChar.isInvalid(c)) {
  -                        err.jspError("jsp.error.xml.invalidCharInPI",
  -				     Integer.toHexString(c));
  -                        scanChar();
  -                    }
  -                }
  -            } while (scanData("?>", fStringBuffer));
  -        }
  -        data.setValues(fStringBuffer);
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLScanner.scanSurrogates
  -    /**
  -     * Scans surrogates and append them to the specified buffer.
  -     * <p>
  -     * <strong>Note:</strong> This assumes the current char has already been
  -     * identified as a high surrogate.
  -     *
  -     * @param buf The StringBuffer to append the read surrogates to.
  -     * @returns True if it succeeded.
  -     */
  -    private boolean scanSurrogates(XMLStringBuffer buf)
  -        throws IOException, JasperException {
  -
  -        int high = scanChar();
  -        int low = peekChar();
  -        if (!XMLChar.isLowSurrogate(low)) {
  -            err.jspError("jsp.error.xml.invalidCharInContent",
  -			 Integer.toString(high, 16));
  -            return false;
  -        }
  -        scanChar();
  -
  -        // convert surrogates to supplemental character
  -        int c = XMLChar.supplemental((char)high, (char)low);
  -
  -        // supplemental character must be a valid XML character
  -        if (!XMLChar.isValid(c)) {
  -            err.jspError("jsp.error.xml.invalidCharInContent",
  -			 Integer.toString(c, 16)); 
  -            return false;
  -        }
  -
  -        // fill in the buffer
  -        buf.append((char)high);
  -        buf.append((char)low);
  -
  -        return true;
  -
  -    }
  -
  -    // Adapted from:
  -    // org.apache.xerces.impl.XMLScanner.reportFatalError
  -    /**
  -     * Convenience function used in all XML scanners.
  -     */
  -    private void reportFatalError(String msgId, String arg)
  -                throws JasperException {
  -        err.jspError(msgId, arg);
  -    }
  -
   }
   
   
  
  
  
  1.1                  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java
  
  Index: XercesEncodingDetector.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.jasper.xmlparser;
  
  import java.io.EOFException;
  import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.io.IOException;
  import java.io.Reader;
  import java.util.Locale;
  import java.util.jar.JarFile;
  
  import org.apache.jasper.JasperException;
  import org.apache.jasper.JspCompilationContext;
  import org.apache.jasper.compiler.ErrorDispatcher;
  import org.apache.jasper.compiler.JspUtil;
  
  import org.apache.xerces.util.EncodingMap;
  import org.apache.xerces.util.SymbolTable;
  import org.apache.xerces.util.XMLChar;
  import org.apache.xerces.util.XMLStringBuffer;
  import org.apache.xerces.xni.XMLString;
  
  public class XercesEncodingDetector extends XMLEncodingDetector {
      
      private InputStream stream;
      private String encoding;
      private boolean isEncodingSetInProlog;
      private Boolean isBigEndian;
      private Reader reader;
      
      // org.apache.xerces.impl.XMLEntityManager fields
      public static final int DEFAULT_BUFFER_SIZE = 2048;
      public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
      private boolean fAllowJavaEncodings;
      private SymbolTable fSymbolTable;
      private XercesEncodingDetector fCurrentEntity;
      private int fBufferSize = DEFAULT_BUFFER_SIZE;
      
      // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
      private int lineNumber = 1;
      private int columnNumber = 1;
      private boolean literal;
      private char[] ch = new char[DEFAULT_BUFFER_SIZE];
      private int position;
      private int count;
      private boolean mayReadChunks = false;
      
      // org.apache.xerces.impl.XMLScanner fields
      private XMLString fString = new XMLString();    
      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
      private final static String fVersionSymbol = "version";
      private final static String fEncodingSymbol = "encoding";
      private final static String fStandaloneSymbol = "standalone";
      
      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
      private int fMarkupDepth = 0;
      private String[] fStrings = new String[3];
  
      private ErrorDispatcher err;
      
      /**
       * Autodetects the encoding of the XML document supplied by the given
       * input stream.
       *
       * Encoding autodetection is done according to the XML 1.0 specification,
       * Appendix F.1: Detection Without External Encoding Information.
       *
       * @param in The input stream to read
       * @param err The error dispatcher
       *
       * @return Two-element array, where the first element (of type
       * java.lang.String) contains the name of the autodetected encoding, and
       * the second element (of type java.lang.Boolean) specifies whether the 
       * encoding was specified by the encoding attribute of an XML declaration
       * (prolog).
       */
      public Object[] getEncoding(InputStream in, ErrorDispatcher err)
  	throws IOException, JasperException
      {
  	XercesEncodingDetector detector = this;
          this.stream = in;
          this.err=err;
  	detector.createInitialReader();
  	detector.scanXMLDecl();
  	
  	return new Object[] { detector.encoding,
  			      new Boolean(detector.isEncodingSetInProlog) };
      }
  
      public Object[] getEncodingMethod(String fname, JarFile jarFile,
  				       JspCompilationContext ctxt,
  				       ErrorDispatcher err)
  	throws IOException, JasperException
      {
  	InputStream inStream = JspUtil.getInputStream(fname, jarFile,
  						      ctxt, err);
  	Object[] ret = getEncoding(inStream, err);
  	inStream.close();
  
  	return ret;
      }
  	
      /**
       * Constructor.
       */
      public XercesEncodingDetector(InputStream stream, ErrorDispatcher err) {
          this.stream = stream;
  	this.err = err;
          fSymbolTable = new SymbolTable();
          fCurrentEntity = this;
      }
      
      // stub method
      void endEntity() {
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.startEntity()
      private void createInitialReader() throws IOException, JasperException {
  
  	// wrap this stream in RewindableInputStream
  	stream = new RewindableInputStream(stream);
  
  	// perform auto-detect of encoding if necessary
  	if (encoding == null) {
  	    // read first four bytes and determine encoding
  	    final byte[] b4 = new byte[4];
  	    int count = 0;
  	    for (; count<4; count++ ) {
  		b4[count] = (byte)stream.read();
  	    }
  	    if (count == 4) {
  		Object [] encodingDesc = getEncodingName(b4, count);
  		encoding = (String)(encodingDesc[0]);
  		isBigEndian = (Boolean)(encodingDesc[1]);
  
  		stream.reset();
  		// Special case UTF-8 files with BOM created by Microsoft
  		// tools. It's more efficient to consume the BOM than make
  		// the reader perform extra checks. -Ac
  		if (count > 2 && encoding.equals("UTF-8")) {
  		    int b0 = b4[0] & 0xFF;
  		    int b1 = b4[1] & 0xFF;
  		    int b2 = b4[2] & 0xFF;
  		    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
  			// ignore first three bytes...
  			stream.skip(3);
  		    }
  		}
  		reader = createReader(stream, encoding, isBigEndian);
  	    } else {
  		reader = createReader(stream, encoding, isBigEndian);
  	    }
  	}
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.createReader
      /**
       * Creates a reader capable of reading the given input stream in
       * the specified encoding.
       *
       * @param inputStream  The input stream.
       * @param encoding     The encoding name that the input stream is
       *                     encoded using. If the user has specified that
       *                     Java encoding names are allowed, then the
       *                     encoding name may be a Java encoding name;
       *                     otherwise, it is an ianaEncoding name.
       * @param isBigEndian   For encodings (like uCS-4), whose names cannot
       *                      specify a byte order, this tells whether the order
       *                      is bigEndian. null means unknown or not relevant.
       *
       * @return Returns a reader.
       */
      private Reader createReader(InputStream inputStream, String encoding,
  				Boolean isBigEndian)
                  throws IOException, JasperException {
  
          // normalize encoding name
          if (encoding == null) {
              encoding = "UTF-8";
          }
  
          // try to use an optimized reader
          String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
          if (ENCODING.equals("UTF-8")) {
              return new UTF8Reader(inputStream, fBufferSize, err);
          }
          if (ENCODING.equals("US-ASCII")) {
              return new ASCIIReader(inputStream, fBufferSize, err);
          }
          if (ENCODING.equals("ISO-10646-UCS-4")) {
              if (isBigEndian != null) {
                  boolean isBE = isBigEndian.booleanValue();
                  if (isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS4BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS4LE);
                  }
              } else {
                  err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  			     encoding);
              }
          }
          if (ENCODING.equals("ISO-10646-UCS-2")) {
              if (isBigEndian != null) { // sould never happen with this encoding...
                  boolean isBE = isBigEndian.booleanValue();
                  if (isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS2BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS2LE);
                  }
              } else {
                  err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  			     encoding);
              }
          }
  
          // check for valid name
          boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
          boolean validJava = XMLChar.isValidJavaEncoding(encoding);
          if (!validIANA || (fAllowJavaEncodings && !validJava)) {
              err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
              // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
              //       because every byte is a valid ISO Latin 1 character.
              //       It may not translate correctly but if we failed on
              //       the encoding anyway, then we're expecting the content
              //       of the document to be bad. This will just prevent an
              //       invalid UTF-8 sequence to be detected. This is only
              //       important when continue-after-fatal-error is turned
              //       on. -Ac
              encoding = "ISO-8859-1";
          }
  
          // try to use a Java reader
          String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
          if (javaEncoding == null) {
              if (fAllowJavaEncodings) {
  		javaEncoding = encoding;
              } else {
                  err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
                  // see comment above.
                  javaEncoding = "ISO8859_1";
              }
          }
          return new InputStreamReader(inputStream, javaEncoding);
  
      } // createReader(InputStream,String, Boolean): Reader
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.getEncodingName
      /**
       * Returns the IANA encoding name that is auto-detected from
       * the bytes specified, with the endian-ness of that encoding where
       * appropriate.
       *
       * @param b4    The first four bytes of the input.
       * @param count The number of bytes actually read.
       * @return a 2-element array:  the first element, an IANA-encoding string,
       *  the second element a Boolean which is true iff the document is big
       *  endian, false if it's little-endian, and null if the distinction isn't
       *  relevant.
       */
      private Object[] getEncodingName(byte[] b4, int count) {
  
          if (count < 2) {
              return new Object[]{"UTF-8", null};
          }
  
          // UTF-16, with BOM
          int b0 = b4[0] & 0xFF;
          int b1 = b4[1] & 0xFF;
          if (b0 == 0xFE && b1 == 0xFF) {
              // UTF-16, big-endian
              return new Object [] {"UTF-16BE", new Boolean(true)};
          }
          if (b0 == 0xFF && b1 == 0xFE) {
              // UTF-16, little-endian
              return new Object [] {"UTF-16LE", new Boolean(false)};
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 3) {
              return new Object [] {"UTF-8", null};
          }
  
          // UTF-8 with a BOM
          int b2 = b4[2] & 0xFF;
          if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
              return new Object [] {"UTF-8", null};
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 4) {
              return new Object [] {"UTF-8", null};
          }
  
          // other encodings
          int b3 = b4[3] & 0xFF;
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
              // UCS-4, big endian (1234)
              return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, little endian (4321)
              return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
          }
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
              // UCS-4, unusual octet order (2143)
              // REVISIT: What should this be?
              return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, unusual octect order (3412)
              // REVISIT: What should this be?
              return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
              // UTF-16, big-endian, no BOM
              // (or could turn out to be UCS-2...
              // REVISIT: What should this be?
              return new Object [] {"UTF-16BE", new Boolean(true)};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
              // UTF-16, little-endian, no BOM
              // (or could turn out to be UCS-2...
              return new Object [] {"UTF-16LE", new Boolean(false)};
          }
          if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
              // EBCDIC
              // a la xerces1, return CP037 instead of EBCDIC here
              return new Object [] {"CP037", null};
          }
  
          // default encoding
          return new Object [] {"UTF-8", null};
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
      /** Returns true if the current entity being scanned is external. */
      public boolean isExternal() {
  	return true;
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
      /**
       * Returns the next character on the input.
       * <p>
       * <strong>Note:</strong> The character is <em>not</em> consumed.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int peekChar() throws IOException {
  	
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  	
  	// peek at character
  	int c = fCurrentEntity.ch[fCurrentEntity.position];
  
  	// return peeked character
  	if (fCurrentEntity.isExternal()) {
  	    return c != '\r' ? c : '\n';
  	}
  	else {
  	    return c;
  	}
  	
      } // peekChar():int
      
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
      /**
       * Returns the next character on the input.
       * <p>
       * <strong>Note:</strong> The character is consumed.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int scanChar() throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// scan character
  	int c = fCurrentEntity.ch[fCurrentEntity.position++];
  	boolean external = false;
  	if (c == '\n' ||
  	    (c == '\r' && (external = fCurrentEntity.isExternal()))) {
  	    fCurrentEntity.lineNumber++;
  	    fCurrentEntity.columnNumber = 1;
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = (char)c;
  		load(1, false);
  	    }
  	    if (c == '\r' && external) {
  		if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  		    fCurrentEntity.position--;
  		}
  		c = '\n';
  	    }
  	}
  
  	// return character that was scanned
  	fCurrentEntity.columnNumber++;
  	return c;
  	
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
      /**
       * Returns a string matching the Name production appearing immediately
       * on the input as a symbol, or null if no Name string is present.
       * <p>
       * <strong>Note:</strong> The Name characters are consumed.
       * <p>
       * <strong>Note:</strong> The string returned must be a symbol. The
       * SymbolTable can be used for this purpose.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       *
       * @see SymbolTable
       * @see XMLChar#isName
       * @see XMLChar#isNameStart
       */
      public String scanName() throws IOException {
  	
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  	
  	// scan name
  	int offset = fCurrentEntity.position;
  	if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  	    if (++fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  		offset = 0;
  		if (load(1, false)) {
  		    fCurrentEntity.columnNumber++;
  		    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
  							   0, 1);
  		    return symbol;
  		}
  	    }
  	    while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  		if (++fCurrentEntity.position == fCurrentEntity.count) {
  		    int length = fCurrentEntity.position - offset;
  		    if (length == fBufferSize) {
  			// bad luck we have to resize our buffer
  			char[] tmp = new char[fBufferSize * 2];
  			System.arraycopy(fCurrentEntity.ch, offset,
  					 tmp, 0, length);
  			fCurrentEntity.ch = tmp;
  			fBufferSize *= 2;
  		    } else {
  			System.arraycopy(fCurrentEntity.ch, offset,
  					 fCurrentEntity.ch, 0, length);
  		    }
  		    offset = 0;
  		    if (load(length, false)) {
  			break;
  		    }
  		}
  	    }
  	}
  	int length = fCurrentEntity.position - offset;
  	fCurrentEntity.columnNumber += length;
  
  	// return name
  	String symbol = null;
  	if (length > 0) {
  	    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  	}
  	return symbol;
  	
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
      /**
       * Scans a range of attribute value data, setting the fields of the
       * XMLString structure, appropriately.
       * <p>
       * <strong>Note:</strong> The characters are consumed.
       * <p>
       * <strong>Note:</strong> This method does not guarantee to return
       * the longest run of attribute value data. This method may return
       * before the quote character due to reaching the end of the input
       * buffer or any other reason.
       * <p>
       * <strong>Note:</strong> The fields contained in the XMLString
       * structure are not guaranteed to remain valid upon subsequent calls
       * to the entity scanner. Therefore, the caller is responsible for
       * immediately using the returned character data or making a copy of
       * the character data.
       *
       * @param quote   The quote character that signifies the end of the
       *                attribute value data.
       * @param content The content structure to fill.
       *
       * @return Returns the next character on the input, if known. This
       *         value may be -1 but this does <em>note</em> designate
       *         end of file.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int scanLiteral(int quote, XMLString content)
  	throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  	    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  	    load(1, false);
  	    fCurrentEntity.position = 0;
  	}
  
  	// normalize newlines
  	int offset = fCurrentEntity.position;
  	int c = fCurrentEntity.ch[offset];
  	int newlines = 0;
  	boolean external = fCurrentEntity.isExternal();
  	if (c == '\n' || (c == '\r' && external)) {
  	    do {
  		c = fCurrentEntity.ch[fCurrentEntity.position++];
  		if (c == '\r' && external) {
  		    newlines++;
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			offset = 0;
  			fCurrentEntity.position = newlines;
  			if (load(newlines, false)) {
  			    break;
  			}
  		    }
  		    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  			fCurrentEntity.position++;
  			offset++;
  		    }
  		    /*** NEWLINE NORMALIZATION ***/
  		    else {
  			newlines++;
  		    }
  		    /***/
  		}
  		else if (c == '\n') {
  		    newlines++;
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			offset = 0;
  			fCurrentEntity.position = newlines;
  			if (load(newlines, false)) {
  			    break;
  			}
  		    }
  		    /*** NEWLINE NORMALIZATION ***
  			 if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  			 && external) {
  			 fCurrentEntity.position++;
  			 offset++;
  			 }
  			 /***/
  		}
  		else {
  		    fCurrentEntity.position--;
  		    break;
  		}
  	    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  	    for (int i = offset; i < fCurrentEntity.position; i++) {
  		fCurrentEntity.ch[i] = '\n';
  	    }
  	    int length = fCurrentEntity.position - offset;
  	    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  		content.setValues(fCurrentEntity.ch, offset, length);
  		return -1;
  	    }
  	}
  
  	// scan literal value
  	while (fCurrentEntity.position < fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if ((c == quote &&
  		 (!fCurrentEntity.literal || external))
  		|| c == '%' || !XMLChar.isContent(c)) {
  		fCurrentEntity.position--;
  		break;
  	    }
  	}
  	int length = fCurrentEntity.position - offset;
  	fCurrentEntity.columnNumber += length - newlines;
  	content.setValues(fCurrentEntity.ch, offset, length);
  
  	// return next character
  	if (fCurrentEntity.position != fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position];
  	    // NOTE: We don't want to accidentally signal the
  	    //       end of the literal if we're expanding an
  	    //       entity appearing in the literal. -Ac
  	    if (c == quote && fCurrentEntity.literal) {
  		c = -1;
  	    }
  	}
  	else {
  	    c = -1;
  	}
  	return c;
  
      }
  
      /**
       * Scans a range of character data up to the specified delimiter,
       * setting the fields of the XMLString structure, appropriately.
       * <p>
       * <strong>Note:</strong> The characters are consumed.
       * <p>
       * <strong>Note:</strong> This assumes that the internal buffer is
       * at least the same size, or bigger, than the length of the delimiter
       * and that the delimiter contains at least one character.
       * <p>
       * <strong>Note:</strong> This method does not guarantee to return
       * the longest run of character data. This method may return before
       * the delimiter due to reaching the end of the input buffer or any
       * other reason.
       * <p>
       * <strong>Note:</strong> The fields contained in the XMLString
       * structure are not guaranteed to remain valid upon subsequent calls
       * to the entity scanner. Therefore, the caller is responsible for
       * immediately using the returned character data or making a copy of
       * the character data.
       *
       * @param delimiter The string that signifies the end of the character
       *                  data to be scanned.
       * @param data      The data structure to fill.
       *
       * @return Returns true if there is more data to scan, false otherwise.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean scanData(String delimiter, XMLStringBuffer buffer)
  	throws IOException {
  
  	boolean done = false;
  	int delimLen = delimiter.length();
  	char charAt0 = delimiter.charAt(0);
  	boolean external = fCurrentEntity.isExternal();
  	do {
      
  	    // load more characters, if needed
      
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		load(0, true);
  	    }
  	    else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
  				 fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
  		load(fCurrentEntity.count - fCurrentEntity.position, false);
  		fCurrentEntity.position = 0;
  	    } 
  	    if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  		// something must be wrong with the input: e.g., file ends an
  		// unterminated comment
  		int length = fCurrentEntity.count - fCurrentEntity.position;
  		buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
  			       length); 
  		fCurrentEntity.columnNumber += fCurrentEntity.count;
  		fCurrentEntity.position = fCurrentEntity.count;
  		load(0,true);
  		return false;
  	    }
      
  	    // normalize newlines
  	    int offset = fCurrentEntity.position;
  	    int c = fCurrentEntity.ch[offset];
  	    int newlines = 0;
  	    if (c == '\n' || (c == '\r' && external)) {
  		do {
  		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  		    if (c == '\r' && external) {
  			newlines++;
  			fCurrentEntity.lineNumber++;
  			fCurrentEntity.columnNumber = 1;
  			if (fCurrentEntity.position == fCurrentEntity.count) {
  			    offset = 0;
  			    fCurrentEntity.position = newlines;
  			    if (load(newlines, false)) {
  				break;
  			    }
  			}
  			if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  			    fCurrentEntity.position++;
  			    offset++;
  			}
  			/*** NEWLINE NORMALIZATION ***/
  			else {
  			    newlines++;
  			}
  		    }
  		    else if (c == '\n') {
  			newlines++;
  			fCurrentEntity.lineNumber++;
  			fCurrentEntity.columnNumber = 1;
  			if (fCurrentEntity.position == fCurrentEntity.count) {
  			    offset = 0;
  			    fCurrentEntity.position = newlines;
  			    fCurrentEntity.count = newlines;
  			    if (load(newlines, false)) {
  				break;
  			    }
  			}
  		    }
  		    else {
  			fCurrentEntity.position--;
  			break;
  		    }
  		} while (fCurrentEntity.position < fCurrentEntity.count - 1);
  		for (int i = offset; i < fCurrentEntity.position; i++) {
  		    fCurrentEntity.ch[i] = '\n';
  		}
  		int length = fCurrentEntity.position - offset;
  		if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  		    buffer.append(fCurrentEntity.ch, offset, length);
  		    return true;
  		}
  	    }
      
  	    // iterate over buffer looking for delimiter
  	OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if (c == charAt0) {
  		// looks like we just hit the delimiter
  		int delimOffset = fCurrentEntity.position - 1;
  		for (int i = 1; i < delimLen; i++) {
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			fCurrentEntity.position -= i;
  			break OUTER;
  		    }
  		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  		    if (delimiter.charAt(i) != c) {
  			fCurrentEntity.position--;
  			break;
  		    }
  		}
  		if (fCurrentEntity.position == delimOffset + delimLen) {
  		    done = true;
  		    break;
  		}
  	    }
  	    else if (c == '\n' || (external && c == '\r')) {
  		fCurrentEntity.position--;
  		break;
  	    }
  	    else if (XMLChar.isInvalid(c)) {
  		fCurrentEntity.position--;
  		int length = fCurrentEntity.position - offset;
  		fCurrentEntity.columnNumber += length - newlines;
  		buffer.append(fCurrentEntity.ch, offset, length); 
  		return true;
  	    }
  	}
  	    int length = fCurrentEntity.position - offset;
  	    fCurrentEntity.columnNumber += length - newlines;
  	    if (done) {
  		length -= delimLen;
  	    }
  	    buffer.append (fCurrentEntity.ch, offset, length);
      
  	    // return true if string was skipped
  	} while (!done);
  	return !done;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
      /**
       * Skips a character appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The character is consumed only if it matches
       * the specified character.
       *
       * @param c The character to skip.
       *
       * @return Returns true if the character was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean skipChar(int c) throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip character
  	int cc = fCurrentEntity.ch[fCurrentEntity.position];
  	if (cc == c) {
  	    fCurrentEntity.position++;
  	    if (c == '\n') {
  		fCurrentEntity.lineNumber++;
  		fCurrentEntity.columnNumber = 1;
  	    }
  	    else {
  		fCurrentEntity.columnNumber++;
  	    }
  	    return true;
  	} else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
  	    // handle newlines
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = (char)cc;
  		load(1, false);
  	    }
  	    fCurrentEntity.position++;
  	    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  		fCurrentEntity.position++;
  	    }
  	    fCurrentEntity.lineNumber++;
  	    fCurrentEntity.columnNumber = 1;
  	    return true;
  	}
  
  	// character was not skipped
  	return false;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
      /**
       * Skips space characters appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The characters are consumed only if they are
       * space characters.
       *
       * @return Returns true if at least one space character was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       *
       * @see XMLChar#isSpace
       */
      public boolean skipSpaces() throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip spaces
  	int c = fCurrentEntity.ch[fCurrentEntity.position];
  	if (XMLChar.isSpace(c)) {
  	    boolean external = fCurrentEntity.isExternal();
  	    do {
  		boolean entityChanged = false;
  		// handle newlines
  		if (c == '\n' || (external && c == '\r')) {
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  			fCurrentEntity.ch[0] = (char)c;
  			entityChanged = load(1, true);
  			if (!entityChanged)
                                  // the load change the position to be 1,
                                  // need to restore it when entity not changed
  			    fCurrentEntity.position = 0;
  		    }
  		    if (c == '\r' && external) {
  			// REVISIT: Does this need to be updated to fix the
  			//          #x0D ^#x0A newline normalization problem? -Ac
  			if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  			    fCurrentEntity.position--;
  			}
  		    }
  		    /*** NEWLINE NORMALIZATION ***
  			 else {
  			 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  			 && external) {
  			 fCurrentEntity.position++;
  			 }
  			 }
  			 /***/
  		}
  		else {
  		    fCurrentEntity.columnNumber++;
  		}
  		// load more characters, if needed
  		if (!entityChanged)
  		    fCurrentEntity.position++;
  		if (fCurrentEntity.position == fCurrentEntity.count) {
  		    load(0, true);
  		}
  	    } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
  	    return true;
  	}
  
  	// no spaces were found
  	return false;
  
      }
  
      /**
       * Skips the specified string appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The characters are consumed only if they are
       * space characters.
       *
       * @param s The string to skip.
       *
       * @return Returns true if the string was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean skipString(String s) throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip string
  	final int length = s.length();
  	for (int i = 0; i < length; i++) {
  	    char c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if (c != s.charAt(i)) {
  		fCurrentEntity.position -= i + 1;
  		return false;
  	    }
  	    if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
  		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
  		// REVISIT: Can a string to be skipped cross an
  		//          entity boundary? -Ac
  		if (load(i + 1, false)) {
  		    fCurrentEntity.position -= i + 1;
  		    return false;
  		}
  	    }
  	}
  	fCurrentEntity.columnNumber += length;
  	return true;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
      /**
       * Loads a chunk of text.
       *
       * @param offset       The offset into the character buffer to
       *                     read the next batch of characters.
       * @param changeEntity True if the load should change entities
       *                     at the end of the entity, otherwise leave
       *                     the current entity in place and the entity
       *                     boundary will be signaled by the return
       *                     value.
       *
       * @returns Returns true if the entity changed as a result of this
       *          load operation.
       */
      final boolean load(int offset, boolean changeEntity)
  	throws IOException {
  
  	// read characters
  	int length = fCurrentEntity.mayReadChunks?
  	    (fCurrentEntity.ch.length - offset):
  	    (DEFAULT_XMLDECL_BUFFER_SIZE);
  	int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
  					       length);
  
  	// reset count and position
  	boolean entityChanged = false;
  	if (count != -1) {
  	    if (count != 0) {
  		fCurrentEntity.count = count + offset;
  		fCurrentEntity.position = offset;
  	    }
  	}
  
  	// end of this entity
  	else {
  	    fCurrentEntity.count = offset;
  	    fCurrentEntity.position = offset;
  	    entityChanged = true;
  	    if (changeEntity) {
  		endEntity();
  		if (fCurrentEntity == null) {
  		    throw new EOFException();
  		}
  		// handle the trailing edges
  		if (fCurrentEntity.position == fCurrentEntity.count) {
  		    load(0, false);
  		}
  	    }
  	}
  
  	return entityChanged;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
      /**
       * This class wraps the byte inputstreams we're presented with.
       * We need it because java.io.InputStreams don't provide
       * functionality to reread processed bytes, and they have a habit
       * of reading more than one character when you call their read()
       * methods.  This means that, once we discover the true (declared)
       * encoding of a document, we can neither backtrack to read the
       * whole doc again nor start reading where we are with a new
       * reader.
       *
       * This class allows rewinding an inputStream by allowing a mark
       * to be set, and the stream reset to that position.  <strong>The
       * class assumes that it needs to read one character per
       * invocation when it's read() method is inovked, but uses the
       * underlying InputStream's read(char[], offset length) method--it
       * won't buffer data read this way!</strong>
       *
       * @author Neil Graham, IBM
       * @author Glenn Marcy, IBM
       */
      private final class RewindableInputStream extends InputStream {
  
          private InputStream fInputStream;
          private byte[] fData;
          private int fStartOffset;
          private int fEndOffset;
          private int fOffset;
          private int fLength;
          private int fMark;
  
          public RewindableInputStream(InputStream is) {
              fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
              fInputStream = is;
              fStartOffset = 0;
              fEndOffset = -1;
              fOffset = 0;
              fLength = 0;
              fMark = 0;
          }
  
          public void setStartOffset(int offset) {
              fStartOffset = offset;
          }
  
          public void rewind() {
              fOffset = fStartOffset;
          }
  
          public int read() throws IOException {
              int b = 0;
              if (fOffset < fLength) {
                  return fData[fOffset++] & 0xff;
              }
              if (fOffset == fEndOffset) {
                  return -1;
              }
              if (fOffset == fData.length) {
                  byte[] newData = new byte[fOffset << 1];
                  System.arraycopy(fData, 0, newData, 0, fOffset);
                  fData = newData;
              }
              b = fInputStream.read();
              if (b == -1) {
                  fEndOffset = fOffset;
                  return -1;
              }
              fData[fLength++] = (byte)b;
              fOffset++;
              return b & 0xff;
          }
  
          public int read(byte[] b, int off, int len) throws IOException {
              int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  // better get some more for the voracious reader...
                  if (fCurrentEntity.mayReadChunks) {
                      return fInputStream.read(b, off, len);
                  }
                  int returnedVal = read();
                  if (returnedVal == -1) {
                      fEndOffset = fOffset;
                      return -1;
                  }
                  b[off] = (byte)returnedVal;
                  return 1;
              }
              if (len < bytesLeft) {
                  if (len <= 0) {
                      return 0;
                  }
              }
              else {
                  len = bytesLeft;
              }
              if (b != null) {
                  System.arraycopy(fData, fOffset, b, off, len);
              }
              fOffset += len;
              return len;
          }
  
          public long skip(long n)
              throws IOException
          {
              int bytesLeft;
              if (n <= 0) {
                  return 0;
              }
              bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return 0;
                  }
                  return fInputStream.skip(n);
              }
              if (n <= bytesLeft) {
                  fOffset += n;
                  return n;
              }
              fOffset += bytesLeft;
              if (fOffset == fEndOffset) {
                  return bytesLeft;
              }
              n -= bytesLeft;
  	    /*
  	     * In a manner of speaking, when this class isn't permitting more
  	     * than one byte at a time to be read, it is "blocking".  The
  	     * available() method should indicate how much can be read without
  	     * blocking, so while we're in this mode, it should only indicate
  	     * that bytes in its buffer are available; otherwise, the result of
  	     * available() on the underlying InputStream is appropriate.
  	     */
              return fInputStream.skip(n) + bytesLeft;
          }
  
          public int available() throws IOException {
              int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  return fCurrentEntity.mayReadChunks ? fInputStream.available()
  		    : 0;
              }
              return bytesLeft;
          }
  
          public void mark(int howMuch) {
              fMark = fOffset;
          }
  
          public void reset() {
              fOffset = fMark;
          }
  
          public boolean markSupported() {
              return true;
          }
  
          public void close() throws IOException {
              if (fInputStream != null) {
                  fInputStream.close();
                  fInputStream = null;
              }
          }
      } // end of RewindableInputStream class
  
      // Adapted from:
      // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
      private void scanXMLDecl() throws IOException, JasperException {
  
  	if (skipString("<?xml")) {
  	    fMarkupDepth++;
  	    // NOTE: special case where document starts with a PI
  	    //       whose name starts with "xml" (e.g. "xmlfoo")
  	    if (XMLChar.isName(peekChar())) {
  		fStringBuffer.clear();
  		fStringBuffer.append("xml");
  		while (XMLChar.isName(peekChar())) {
  		    fStringBuffer.append((char)scanChar());
  		}
  		String target = fSymbolTable.addSymbol(fStringBuffer.ch,
  						       fStringBuffer.offset,
  						       fStringBuffer.length);
  		scanPIData(target, fString);
  	    }
  
  	    // standard XML declaration
  	    else {
  		scanXMLDeclOrTextDecl(false);
  	    }
  	}
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
      /**
       * Scans an XML or text declaration.
       * <p>
       * <pre>
       * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
       * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
       * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
       * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
       * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
       *                 | ('"' ('yes' | 'no') '"'))
       *
       * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
       * </pre>
       *
       * @param scanningTextDecl True if a text declaration is to
       *                         be scanned instead of an XML
       *                         declaration.
       */
      private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
          throws IOException, JasperException {
  
          // scan decl
          scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
          fMarkupDepth--;
  
          // pseudo-attribute values
          String encodingPseudoAttr = fStrings[1];
  
          // set encoding on reader
          if (encodingPseudoAttr != null) {
              isEncodingSetInProlog = true;
  	    encoding = encodingPseudoAttr;
          }
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
      /**
       * Scans an XML or text declaration.
       * <p>
       * <pre>
       * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
       * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
       * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
       * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
       * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
       *                 | ('"' ('yes' | 'no') '"'))
       *
       * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
       * </pre>
       *
       * @param scanningTextDecl True if a text declaration is to
       *                         be scanned instead of an XML
       *                         declaration.
       * @param pseudoAttributeValues An array of size 3 to return the version,
       *                         encoding and standalone pseudo attribute values
       *                         (in that order).
       *
       * <strong>Note:</strong> This method uses fString, anything in it
       * at the time of calling is lost.
       */
      private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  				       String[] pseudoAttributeValues) 
                  throws IOException, JasperException {
  
          // pseudo-attribute values
          String version = null;
          String encoding = null;
          String standalone = null;
  
          // scan pseudo-attributes
          final int STATE_VERSION = 0;
          final int STATE_ENCODING = 1;
          final int STATE_STANDALONE = 2;
          final int STATE_DONE = 3;
          int state = STATE_VERSION;
  
          boolean dataFoundForTarget = false;
          boolean sawSpace = skipSpaces();
          while (peekChar() != '?') {
              dataFoundForTarget = true;
              String name = scanPseudoAttribute(scanningTextDecl, fString);
              switch (state) {
                  case STATE_VERSION: {
                      if (name == fVersionSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                         ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
                                         : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
                                               null);
                          }
                          version = fString.toString();
                          state = STATE_ENCODING;
                          if (!version.equals("1.0")) {
                              // REVISIT: XML REC says we should throw an error
  			    // in such cases.
                              // some may object the throwing of fatalError.
                              err.jspError("jsp.error.xml.versionNotSupported",
  					 version);
                          }
                      } else if (name == fEncodingSymbol) {
                          if (!scanningTextDecl) {
                              err.jspError("jsp.error.xml.versionInfoRequired");
                          }
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                        : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                      } else {
                          if (scanningTextDecl) {
                              err.jspError("jsp.error.xml.encodingDeclRequired");
                          }
                          else {
                              err.jspError("jsp.error.xml.versionInfoRequired");
                          }
                      }
                      break;
                  }
                  case STATE_ENCODING: {
                      if (name == fEncodingSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                        : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                          // TODO: check encoding name; set encoding on
                          //       entity scanner
                      } else if (!scanningTextDecl && name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              err.jspError("jsp.error.xml.sdDeclInvalid");
                          }
                      } else {
                          err.jspError("jsp.error.xml.encodingDeclRequired");
                      }
                      break;
                  }
                  case STATE_STANDALONE: {
                      if (name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              err.jspError("jsp.error.xml.sdDeclInvalid");
                          }
                      } else {
  			err.jspError("jsp.error.xml.encodingDeclRequired");
                      }
                      break;
                  }
                  default: {
                      err.jspError("jsp.error.xml.noMorePseudoAttributes");
                  }
              }
              sawSpace = skipSpaces();
          }
          // REVISIT: should we remove this error reporting?
          if (scanningTextDecl && state != STATE_DONE) {
              err.jspError("jsp.error.xml.morePseudoAttributes");
          }
          
          // If there is no data in the xml or text decl then we fail to report
  	// error for version or encoding info above.
          if (scanningTextDecl) {
              if (!dataFoundForTarget && encoding == null) {
                  err.jspError("jsp.error.xml.encodingDeclRequired");
              }
          } else {
              if (!dataFoundForTarget && version == null) {
                  err.jspError("jsp.error.xml.versionInfoRequired");
              }
          }
  
          // end
          if (!skipChar('?')) {
              err.jspError("jsp.error.xml.xmlDeclUnterminated");
          }
          if (!skipChar('>')) {
              err.jspError("jsp.error.xml.xmlDeclUnterminated");
  
          }
          
          // fill in return array
          pseudoAttributeValues[0] = version;
          pseudoAttributeValues[1] = encoding;
          pseudoAttributeValues[2] = standalone;
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
      /**
       * Scans a pseudo attribute.
       *
       * @param scanningTextDecl True if scanning this pseudo-attribute for a
       *                         TextDecl; false if scanning XMLDecl. This 
       *                         flag is needed to report the correct type of
       *                         error.
       * @param value            The string to fill in with the attribute 
       *                         value.
       *
       * @return The name of the attribute
       *
       * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
       * at the time of calling is lost.
       */
      public String scanPseudoAttribute(boolean scanningTextDecl, 
                                        XMLString value) 
                  throws IOException, JasperException {
  
          String name = scanName();
          if (name == null) {
              err.jspError("jsp.error.xml.pseudoAttrNameExpected");
          }
          skipSpaces();
          if (!skipChar('=')) {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.eqRequiredInTextDecl"
                               : "jsp.error.xml.eqRequiredInXMLDecl",
  			     name);
          }
          skipSpaces();
          int quote = peekChar();
          if (quote != '\'' && quote != '"') {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.quoteRequiredInTextDecl"
                               : "jsp.error.xml.quoteRequiredInXMLDecl" ,
  			     name);
          }
          scanChar();
          int c = scanLiteral(quote, value);
          if (c != quote) {
              fStringBuffer2.clear();
              do {
                  fStringBuffer2.append(value);
                  if (c != -1) {
                      if (c == '&' || c == '%' || c == '<' || c == ']') {
                          fStringBuffer2.append((char)scanChar());
                      }
                      else if (XMLChar.isHighSurrogate(c)) {
                          scanSurrogates(fStringBuffer2);
                      }
                      else if (XMLChar.isInvalid(c)) {
                          String key = scanningTextDecl
                              ? "jsp.error.xml.invalidCharInTextDecl"
  			    : "jsp.error.xml.invalidCharInXMLDecl";
                          reportFatalError(key, Integer.toString(c, 16));
                          scanChar();
                      }
                  }
                  c = scanLiteral(quote, value);
              } while (c != quote);
              fStringBuffer2.append(value);
              value.setValues(fStringBuffer2);
          }
          if (!skipChar(quote)) {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.closeQuoteMissingInTextDecl"
                               : "jsp.error.xml.closeQuoteMissingInXMLDecl",
  			     name);
          }
  
          // return
          return name;
  
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanPIData
      /**
       * Scans a processing data. This is needed to handle the situation
       * where a document starts with a processing instruction whose 
       * target name <em>starts with</em> "xml". (e.g. xmlfoo)
       *
       * <strong>Note:</strong> This method uses fStringBuffer, anything in it
       * at the time of calling is lost.
       *
       * @param target The PI target
       * @param data The string to fill in with the data
       */
      private void scanPIData(String target, XMLString data) 
          throws IOException, JasperException {
  
          // check target
          if (target.length() == 3) {
              char c0 = Character.toLowerCase(target.charAt(0));
              char c1 = Character.toLowerCase(target.charAt(1));
              char c2 = Character.toLowerCase(target.charAt(2));
              if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
                  err.jspError("jsp.error.xml.reservedPITarget");
              }
          }
  
          // spaces
          if (!skipSpaces()) {
              if (skipString("?>")) {
                  // we found the end, there is no data
                  data.clear();
                  return;
              }
              else {
                  // if there is data there should be some space
                  err.jspError("jsp.error.xml.spaceRequiredInPI");
              }
          }
  
          fStringBuffer.clear();
          // data
          if (scanData("?>", fStringBuffer)) {
              do {
                  int c = peekChar();
                  if (c != -1) {
                      if (XMLChar.isHighSurrogate(c)) {
                          scanSurrogates(fStringBuffer);
                      } else if (XMLChar.isInvalid(c)) {
                          err.jspError("jsp.error.xml.invalidCharInPI",
  				     Integer.toHexString(c));
                          scanChar();
                      }
                  }
              } while (scanData("?>", fStringBuffer));
          }
          data.setValues(fStringBuffer);
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanSurrogates
      /**
       * Scans surrogates and append them to the specified buffer.
       * <p>
       * <strong>Note:</strong> This assumes the current char has already been
       * identified as a high surrogate.
       *
       * @param buf The StringBuffer to append the read surrogates to.
       * @returns True if it succeeded.
       */
      private boolean scanSurrogates(XMLStringBuffer buf)
          throws IOException, JasperException {
  
          int high = scanChar();
          int low = peekChar();
          if (!XMLChar.isLowSurrogate(low)) {
              err.jspError("jsp.error.xml.invalidCharInContent",
  			 Integer.toString(high, 16));
              return false;
          }
          scanChar();
  
          // convert surrogates to supplemental character
          int c = XMLChar.supplemental((char)high, (char)low);
  
          // supplemental character must be a valid XML character
          if (!XMLChar.isValid(c)) {
              err.jspError("jsp.error.xml.invalidCharInContent",
  			 Integer.toString(c, 16)); 
              return false;
          }
  
          // fill in the buffer
          buf.append((char)high);
          buf.append((char)low);
  
          return true;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.reportFatalError
      /**
       * Convenience function used in all XML scanners.
       */
      private void reportFatalError(String msgId, String arg)
                  throws JasperException {
          err.jspError(msgId, arg);
      }
  
  }
  
  
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>