You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@tomcat.apache.org by lu...@apache.org on 2002/11/06 21:14:20 UTC

cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser ASCIIReader.java UCSReader.java UTF8Reader.java XMLEncodingDetector.java

luehe       2002/11/06 12:14:20

  Modified:    jasper2/src/share/org/apache/jasper/compiler
                        ErrorDispatcher.java JspReader.java JspUtil.java
                        PageDataImpl.java PageInfo.java
                        ParserController.java Validator.java
               jasper2/src/share/org/apache/jasper/resources
                        messages.properties messages_es.properties
                        messages_ja.properties
  Added:       jasper2/src/share/org/apache/jasper/xmlparser
                        ASCIIReader.java UCSReader.java UTF8Reader.java
                        XMLEncodingDetector.java
  Log:
  First cut at I18N changes.
  
  Revision  Changes    Path
  1.7       +22 -24    jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ErrorDispatcher.java
  
  Index: ErrorDispatcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ErrorDispatcher.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ErrorDispatcher.java	4 Nov 2002 20:18:24 -0000	1.6
  +++ ErrorDispatcher.java	6 Nov 2002 20:14:19 -0000	1.7
  @@ -96,9 +96,6 @@
   	errHandler = new DefaultErrorHandler(this);
       }
   
  -    //*********************************************************************
  -    // Package-scoped utility methods
  -
       /*
        * Dispatches the given JSP parse error to the configured error handler.
        *
  @@ -108,7 +105,7 @@
        *
        * @param errCode Error code
        */
  -    void jspError(String errCode) throws JasperException {
  +    public void jspError(String errCode) throws JasperException {
   	dispatch(null, errCode, null, null);
       }
   
  @@ -122,7 +119,7 @@
        * @param where Error location
        * @param errCode Error code
        */
  -    void jspError(Mark where, String errCode) throws JasperException {
  +    public void jspError(Mark where, String errCode) throws JasperException {
   	dispatch(where, errCode, null, null);
       }
   
  @@ -136,7 +133,7 @@
        * @param n Node that caused the error
        * @param errCode Error code
        */
  -    void jspError(Node n, String errCode) throws JasperException {
  +    public void jspError(Node n, String errCode) throws JasperException {
   	dispatch(n.getStart(), errCode, null, null);
       }
   
  @@ -150,7 +147,7 @@
        * @param errCode Error code
        * @param arg Argument for parametric replacement
        */
  -    void jspError(String errCode, String arg) throws JasperException {
  +    public void jspError(String errCode, String arg) throws JasperException {
   	dispatch(null, errCode, new Object[] {arg}, null);
       }
   
  @@ -165,7 +162,7 @@
        * @param errCode Error code
        * @param arg Argument for parametric replacement
        */
  -    void jspError(Mark where, String errCode, String arg)
  +    public void jspError(Mark where, String errCode, String arg)
   	        throws JasperException {
   	dispatch(where, errCode, new Object[] {arg}, null);
       }
  @@ -181,7 +178,7 @@
        * @param errCode Error code
        * @param arg Argument for parametric replacement
        */
  -    void jspError(Node n, String errCode, String arg)
  +    public void jspError(Node n, String errCode, String arg)
   	        throws JasperException {
   	dispatch(n.getStart(), errCode, new Object[] {arg}, null);
       }
  @@ -197,7 +194,7 @@
        * @param arg1 First argument for parametric replacement
        * @param arg2 Second argument for parametric replacement
        */
  -    void jspError(String errCode, String arg1, String arg2)
  +    public void jspError(String errCode, String arg1, String arg2)
   	        throws JasperException {
   	dispatch(null, errCode, new Object[] {arg1, arg2}, null);
       }
  @@ -214,7 +211,7 @@
        * @param arg1 First argument for parametric replacement
        * @param arg2 Second argument for parametric replacement
        */
  -    void jspError(Mark where, String errCode, String arg1, String arg2)
  +    public void jspError(Mark where, String errCode, String arg1, String arg2)
   	        throws JasperException {
   	dispatch(where, errCode, new Object[] {arg1, arg2}, null);
       }
  @@ -231,7 +228,7 @@
        * @param arg1 First argument for parametric replacement
        * @param arg2 Second argument for parametric replacement
        */
  -    void jspError(Node n, String errCode, String arg1, String arg2)
  +    public void jspError(Node n, String errCode, String arg1, String arg2)
   	        throws JasperException {
   	dispatch(n.getStart(), errCode, new Object[] {arg1, arg2}, null);
       }
  @@ -241,7 +238,7 @@
        *
        * @param e Parsing exception
        */
  -    void jspError(Exception e) throws JasperException {
  +    public void jspError(Exception e) throws JasperException {
   	dispatch(null, null, null, e);
       }
   
  @@ -256,7 +253,7 @@
        * @param arg Argument for parametric replacement
        * @param e Parsing exception
        */
  -    void jspError(String errCode, String arg, Exception e)
  +    public void jspError(String errCode, String arg, Exception e)
   	        throws JasperException {
   	dispatch(null, errCode, new Object[] {arg}, e);
       }
  @@ -273,7 +270,7 @@
        * @param arg Argument for parametric replacement
        * @param e Parsing exception
        */
  -    void jspError(Node n, String errCode, String arg, Exception e)
  +    public void jspError(Node n, String errCode, String arg, Exception e)
   	        throws JasperException {
   	dispatch(n.getStart(), errCode, new Object[] {arg}, e);
       }
  @@ -287,7 +284,7 @@
        * @param page Node representation of JSP page from which the Java source
        * file was generated
        */
  -    void javacError(String errMsg, String fname, Node.Nodes page)
  +    public void javacError(String errMsg, String fname, Node.Nodes page)
   	        throws JasperException, IOException {
   	JavacErrorDetail[] errDetails = parseJavacMessage(errMsg, fname, page);
   	errHandler.javacError(errDetails);
  @@ -304,7 +301,7 @@
        * 
        * @return Localized error message
        */
  -    String getString(String errCode) {
  +    public String getString(String errCode) {
   	String errMsg = errCode;
   	try {
   	    errMsg = bundle.getString(errCode);
  @@ -325,7 +322,7 @@
        *
        * @return Localized error message
        */
  -    String getString(String errCode, String arg) {
  +    public String getString(String errCode, String arg) {
   	return getString(errCode, new Object[] {arg});
       }
   
  @@ -342,7 +339,7 @@
        *
        * @return Localized error message
        */
  -    String getString(String errCode, String arg1, String arg2) {
  +    public String getString(String errCode, String arg1, String arg2) {
   	return getString(errCode, new Object[] {arg1, arg2});
       }
       
  @@ -360,7 +357,8 @@
        *
        * @return Localized error message
        */
  -    String getString(String errCode, String arg1, String arg2, String arg3) {
  +    public String getString(String errCode, String arg1, String arg2,
  +			    String arg3) {
   	return getString(errCode, new Object[] {arg1, arg2, arg3});
       }
   
  @@ -376,7 +374,7 @@
        *
        * @return Localized error message
        */
  -    String getString(String errCode, Object[] args) {
  +    public String getString(String errCode, Object[] args) {
   	String errMsg = errCode;
   	try {
   	    errMsg = bundle.getString(errCode);
  
  
  
  1.11      +25 -7     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspReader.java
  
  Index: JspReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspReader.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- JspReader.java	10 Oct 2002 00:49:21 -0000	1.10
  +++ JspReader.java	6 Nov 2002 20:14:19 -0000	1.11
  @@ -57,6 +57,7 @@
   
   import java.io.*;
   import java.util.*;
  +import java.util.jar.JarFile;
   import org.apache.jasper.Constants;
   import org.apache.jasper.JasperException;
   import org.apache.jasper.JspCompilationContext;
  @@ -94,29 +95,44 @@
       /*
        * Set to true when using the JspReader on a single file where we read up
        * to the end and reset to the beginning many times.
  -     * (as in ParserCtl.figureOutJspDocument().
  +     * (as in ParserController.figureOutJspDocument()).
        */
       private boolean singleFile;
   
       /*
        * Constructor.
        */
  -    public JspReader(JspCompilationContext ctx,
  -		     String file,
  +    public JspReader(JspCompilationContext ctxt,
  +		     String fname,
  +		     String encoding,
  +		     JarFile jarFile,
  +		     ErrorDispatcher err)
  +	    throws JasperException, FileNotFoundException, IOException {
  +
  +	this(ctxt, fname, encoding,
  +	     JspUtil.getReader(fname, encoding, jarFile, ctxt, err),
  +	     err);
  +    }
  +
  +    /*
  +     * Constructor.
  +     */
  +    public JspReader(JspCompilationContext ctxt,
  +		     String fname,
   		     String encoding,
   		     InputStreamReader reader,
   		     ErrorDispatcher err)
   	    throws JasperException, FileNotFoundException {
   
  -        this.context = ctx;
  +        this.context = ctxt;
   	this.err = err;
   	sourceFiles = new Vector();
   	currFileId = 0;
   	size = 0;
   	singleFile = false;
  -	loghelper = new Logger.Helper("JASPER_LOG", "JspReader");
   
  -	pushFile2(file, encoding, reader);
  +	loghelper = new Logger.Helper("JASPER_LOG", "JspReader");
  +	pushFile2(fname, encoding, reader);
       }
       
       String getFile(int fileid) {
  @@ -553,7 +569,9 @@
   	    err.jspError("jsp.error.file.cannot.read", "ze file");
   	} finally {
   	    if (reader != null) {
  -		try { reader.close(); } catch (Exception any) {}
  +		try {
  +		    reader.close();
  +		} catch (Exception any) {}
   	    }
   	}
       }
  
  
  
  1.21      +52 -8     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspUtil.java
  
  Index: JspUtil.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspUtil.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- JspUtil.java	17 Oct 2002 20:43:06 -0000	1.20
  +++ JspUtil.java	6 Nov 2002 20:14:19 -0000	1.21
  @@ -62,18 +62,18 @@
   
   import java.net.URL;
   
  -import java.io.CharArrayWriter;
  -import java.io.IOException;
  -import java.io.InputStream;
  -import java.io.FileInputStream;
  +import java.io.*;
   import java.util.ArrayList;
   import java.util.Enumeration;
   import java.util.Hashtable;
   import java.util.NoSuchElementException;
   import java.util.Vector;
   import java.util.StringTokenizer;
  +import java.util.jar.JarFile;
  +import java.util.zip.ZipEntry;
   
   import org.apache.jasper.Constants;
  +import org.apache.jasper.JspCompilationContext;
   import org.apache.jasper.JasperException;
   import org.apache.jasper.logging.Logger;
   
  @@ -97,7 +97,7 @@
    * @author Shawn Bayern
    * @author Mark Roth
    */
  -class JspUtil {
  +public class JspUtil {
   
       // Delimiters for request-time expressions (JSP and XML syntax)
       private static final String OPEN_EXPR  = "<%=";
  @@ -716,6 +716,50 @@
           public Class[] getParameterTypes() {
               return this.parameterTypes;
           }    
  +    }
  +
  +    public static InputStream getInputStream(String fname, JarFile jarFile,
  +					     JspCompilationContext ctxt,
  +					     ErrorDispatcher err)
  +	        throws JasperException, IOException {
  +
  +        InputStream in = null;
  +
  +	if (jarFile != null) {
  +	    String jarEntryName = fname.substring(1, fname.length());
  +	    ZipEntry jarEntry = jarFile.getEntry(jarEntryName);
  +	    if (jarEntry == null) {
  +		err.jspError("jsp.error.file.not.found", fname);
  +	    }
  +	    in = jarFile.getInputStream(jarEntry);
  +	} else {
  +	    in = ctxt.getResourceAsStream(fname);
  +	}
  +
  +	if (in == null) {
  +	    err.jspError("jsp.error.file.not.found", fname);
  +	}
  +
  +	return in;
  +    }
  +
  +
  +    static InputStreamReader getReader(String fname, String encoding,
  +				       JarFile jarFile,
  +				       JspCompilationContext ctxt,
  +				       ErrorDispatcher err)
  +	        throws JasperException, IOException {
  +
  +        InputStreamReader reader = null;
  +	InputStream in = getInputStream(fname, jarFile, ctxt, err);
  +
  +	try {
  +            reader = new InputStreamReader(in, encoding);
  +	} catch (UnsupportedEncodingException ex) {
  +	    err.jspError("jsp.error.unsupported.encoding", encoding);
  +	}
  +
  +	return reader;
       }
   }
   
  
  
  
  1.15      +19 -6     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageDataImpl.java
  
  Index: PageDataImpl.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageDataImpl.java,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- PageDataImpl.java	30 Oct 2002 18:20:21 -0000	1.14
  +++ PageDataImpl.java	6 Nov 2002 20:14:19 -0000	1.15
  @@ -120,7 +120,8 @@
        *
        * @param page the page nodes from which to generate the XML view
        */
  -    public PageDataImpl(Node.Nodes page) throws JasperException {
  +    public PageDataImpl(Node.Nodes page, PageInfo pageInfo)
  +	        throws JasperException {
   
   	// First pass
   	FirstPassVisitor firstPassVisitor
  @@ -130,7 +131,7 @@
   	// Second pass
   	buf = new StringBuffer();
   	SecondPassVisitor secondPassVisitor
  -	    = new SecondPassVisitor(page.getRoot(), buf);
  +	    = new SecondPassVisitor(page.getRoot(), buf, pageInfo);
   	page.visit(secondPassVisitor);
       }
   
  @@ -236,6 +237,7 @@
   
   	private Node.Root root;
   	private StringBuffer buf;
  +	private PageInfo pageInfo;
   
   	// current jsp:id attribute value
   	private int jspId;
  @@ -243,9 +245,11 @@
   	/*
   	 * Constructor
   	 */
  -	public SecondPassVisitor(Node.Root root, StringBuffer buf) {
  +	public SecondPassVisitor(Node.Root root, StringBuffer buf,
  +				 PageInfo pageInfo) {
   	    this.root = root;
   	    this.buf = buf;
  +	    this.pageInfo = pageInfo;
   	}
   
   	/*
  @@ -254,6 +258,7 @@
   	public void visit(Node.Root n) throws JasperException {
   	    if (n == this.root) {
   		// top-level page
  +		appendXmlProlog();
   		appendTag(JSP_ROOT, n.getAttributes(), n.getBody(), null);
   	    } else {
   		visitBody(n);
  @@ -269,6 +274,7 @@
   	public void visit(Node.JspRoot n) throws JasperException {
   	    if (n == this.root) {
   		// top-level jsp:root element
  +		appendXmlProlog();
   		appendTag(JSP_ROOT, n.getAttributes(), n.getBody(), null);
   	    } else {
   		visitBody(n);
  @@ -525,6 +531,13 @@
   		buf.append("  ").append(name).append("=\"");
   		buf.append(JspUtil.getExprInXml(value)).append("\"\n");
   	    }
  +	}
  +
  +	/*
  +	 * Appends XML prolog with encoding declaration.
  +	 */
  +	private void appendXmlProlog() {
  +	    buf.append("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
   	}
       }
   }
  
  
  
  1.14      +38 -7     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java
  
  Index: PageInfo.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- PageInfo.java	1 Nov 2002 02:54:41 -0000	1.13
  +++ PageInfo.java	6 Nov 2002 20:14:19 -0000	1.14
  @@ -65,7 +65,7 @@
   import org.apache.jasper.Constants;
   
   /**
  - * A repository for various info about the page under compilation
  + * A repository for various info about the translation unit under compilation.
    *
    * @author Kin-man Chung
    */
  @@ -88,15 +88,30 @@
       private boolean isErrorPage = false;
       private String errorPage = null;
       private String pageEncoding = null;
  +
  +    /*
  +     * Auto-detected encoding, or encoding specified in XML prolog
  +     * (declaration).
  +     * Only meaningful for XML documents.
  +     */
  +    private String xmlEncoding = null;
  +
  +    // Indicates whether page has XML declaration with encoding attribute
  +    private boolean hasEncodingProlog = false;
  +
       private int maxTagNesting = 0;
       private boolean scriptless = false;
       private boolean scriptingInvalid = false;
       private boolean elIgnored = false;
       private boolean elIgnoredSpecified = false;
       private boolean isXml = false;
  -    private boolean isXmlSpecified = false;	// true is there is a is-xml
  -						// element in jsp-config
  -    private boolean hasTagFile = false;		// A custom tag is a tag file
  +
  +    // true is there is a is-xml element in jsp-config
  +    private boolean isXmlSpecified = false;	
  +
  +    // A custom tag is a tag file
  +    private boolean hasTagFile = false;
  +
       private boolean hasJspRoot = false;
       private Vector includePrelude;
       private Vector includeCoda;
  @@ -218,6 +233,22 @@
   
       public String getPageEncoding() {
   	return pageEncoding;
  +    }
  +
  +    public void setXmlEncoding(String xmlEncoding) {
  +	this.xmlEncoding = xmlEncoding;
  +    }
  +
  +    public String getXmlEncoding() {
  +	return xmlEncoding;
  +    }
  +    
  +    public void setHasEncodingProlog(boolean hasEncodingProlog) {
  +	this.hasEncodingProlog = hasEncodingProlog;
  +    }
  +
  +    public boolean hasEncodingProlog() {
  +	return hasEncodingProlog;
       }
   
       public int getMaxTagNesting() {
  
  
  
  1.24      +112 -113  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java
  
  Index: ParserController.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- ParserController.java	28 Oct 2002 23:21:08 -0000	1.23
  +++ ParserController.java	6 Nov 2002 20:14:19 -0000	1.24
  @@ -63,6 +63,7 @@
   import org.xml.sax.Attributes;
   import org.apache.jasper.*;
   import org.apache.jasper.logging.Logger;
  +import org.apache.jasper.xmlparser.XMLEncodingDetector;
   
   /**
    * Controller for the parsing of a JSP page.
  @@ -75,30 +76,34 @@
    * the proper parser.
    *
    * @author Pierre Delisle
  + * @author Jan Luehe
    */
   class ParserController {
   
  +    private static final String CHARSET = "charset=";
  +
       private JspCompilationContext ctxt;
       private Compiler compiler;
  +    private PageInfo pageInfo;
       private ErrorDispatcher err;
   
       /*
  -     * A stack to keep track of the 'current base directory'
  -     * for include directives that refer to relative paths.
  -     */
  -    private Stack baseDirStack = new Stack();
  -
  -    /*
        * Document information which tells us what
        * kind of document we are dealing with.
        */
       private boolean isXml;
  +
  +    /*
  +     * A stack to keep track of the 'current base directory'
  +     * for include directives that refer to relative paths.
  +     */
  +    private Stack baseDirStack = new Stack();
       
       /*
        * Static information used in the process of figuring out
        * the kind of document we're dealing with.
        */
  -    private static final String JSP_ROOT_TAG   = "<jsp:root";
  +    private static final String JSP_ROOT_TAG = "<jsp:root";
   
       /*
        * Tells if the file being processed is the "top" file
  @@ -107,24 +112,12 @@
       private boolean isTopFile = true;
   
       /*
  -     * The encoding of the "top" file. This encoding is used
  -     * for included files by default.
  -     * Defaults to "ISO-8859-1" per JSP spec.
  -     */
  -    private String topFileEncoding = "ISO-8859-1"; 
  -    
  -    /*
  -     * The 'new' encoding required to read a page.
  -     */
  -    private String newEncoding;
  -
  -
  -    /*
        * Constructor
        */
       public ParserController(JspCompilationContext ctxt, Compiler compiler) {
           this.ctxt = ctxt; // @@@ can we assert that ctxt is not null?
   	this.compiler = compiler;
  +	this.pageInfo = compiler.getPageInfo();
   	this.err = compiler.getErrorDispatcher();
       }
   
  @@ -187,34 +180,29 @@
   	        throws FileNotFoundException, JasperException, IOException {
   
   	Node.Nodes parsedPage = null;
  -	String encoding = topFileEncoding;
           InputStreamReader reader = null;
   	String absFileName = resolveFileName(inFileName);
   
   	JarFile jarFile = (JarFile) ctxt.getTagFileJars().get(inFileName);
   
           try {
  -            // Figure out what type of JSP document we are dealing with
  -            reader = getReader(absFileName, encoding, jarFile);
  -            figureOutJspDocument(absFileName, encoding, reader);
  -	    if (newEncoding != null)
  -		encoding = newEncoding;
  +            // Figure out what type of JSP document and encoding type we are
  +	    // dealing with
  +            String encoding = figureOutJspDocument(absFileName, jarFile);
  +
   	    if (isTopFile) {
  -		// Set the "top level" file encoding that will be used
  -		// for all included files where encoding is not defined.
  -		topFileEncoding = encoding;
  +		pageInfo.setIsXml(isXml);
  +		if (isXml) {
  +		    pageInfo.setXmlEncoding(encoding);
  +		}
   		isTopFile = false;
   	    } else {
  -                compiler.getPageInfo().addDependant(absFileName);
  -            }
  -	    try {
  -		reader.close();
  -	    } catch (IOException ex) {
  +		compiler.getPageInfo().addDependant(absFileName);
   	    }
   
               // dispatch to the proper parser
  -	    
  -            reader = getReader(absFileName, encoding, jarFile);
  +            reader = JspUtil.getReader(absFileName, encoding, jarFile, ctxt,
  +				       err);
               if (isXml) {
                   parsedPage = JspDocumentParser.parse(this, absFileName,
   						     reader, parent,
  @@ -240,40 +228,68 @@
       }
   
       /**
  -     * Discover the properties of the page by scanning it.
  -     * Properties to find out are:
  -     *   - Is it in XML syntax?
  -     *   - What is the the page encoding
  +     * Determines the properties of the given page or tag file.
  +     * The properties to be determined are:
  +     *
  +     *   - Syntax (JSP or XML).
  +     *     This information is supplied by setting the instance variable
  +     *     'isXml'.
  +     *
  +     *   - Source Encoding.
  +     *     This information is supplied as the return value.
  +     *
        * If these properties are already specified in the jsp-config element
        * in web.xml, then they are used.
  +     *
  +     * @return The source encoding 
        */
  -    private void figureOutJspDocument(String file, 
  -				      String encoding,
  -				      InputStreamReader reader)
  -	 throws JasperException
  -    {
  -	newEncoding = null;
  -	PageInfo pageInfo = compiler.getPageInfo();
  +    private String figureOutJspDocument(String fname, JarFile jarFile)
  +	        throws JasperException, IOException {
  +
   	boolean isXmlFound = false;
  +	isXml = false;
  +
   	if (pageInfo.isXmlSpecified()) {
   	    // If <is-xml> is specified in a <jsp-property-group>, it is used.
   	    isXml = pageInfo.isXml();
   	    isXmlFound = true;
  -	} else if (file.endsWith(".jspx")) {
  +	} else if (fname.endsWith(".jspx")) {
   	    isXml = true;
   	    isXmlFound = true;
   	}
   	
  -	if (pageInfo.getPageEncoding() != null) {
  -	    newEncoding = pageInfo.getPageEncoding();
  +	String sourceEnc = null;
  +	if (isXmlFound && !isXml) {
  +	    // JSP syntax
  +	    if (pageInfo.getPageEncoding() != null) {
  +		// encoding specified in jsp-config (used only by JSP syntax)
  +		return pageInfo.getPageEncoding();
  +	    } else {
  +		// We don't know the encoding
  +		sourceEnc = "ISO-8859-1";
  +	    }
  +	} else {
  +	    // XML syntax or unknown, autodetect encoding ...
  +	    Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile,
  +							   ctxt, err);
  +	    sourceEnc = (String) ret[0];
  +	    boolean isEncodingSetInProlog = ((Boolean) ret[1]).booleanValue();
  +	    if (isTopFile) {
  +		pageInfo.setHasEncodingProlog(isEncodingSetInProlog);
  +	    }
  +	    if (isEncodingSetInProlog) {
  +		// Prolog present only in XML syntax
  +		isXml = true;
  +	    }
   	}
   
  -	if (isXmlFound && newEncoding != null)
  -	    return;	// No need to scan the file
  +	if (isXml) {
  +	    return sourceEnc;
  +	}
   
  -	JspReader jspReader;
  +	JspReader jspReader = null;
   	try {
  -	    jspReader = new JspReader(ctxt, file, encoding, reader, err);
  +	    jspReader = new JspReader(ctxt, fname, sourceEnc, jarFile, err);
   	} catch (FileNotFoundException ex) {
   	    throw new JasperException(ex);
   	}
  @@ -288,47 +304,60 @@
   	    Mark mark = jspReader.skipUntil(JSP_ROOT_TAG);
   	    if (mark != null) {
   	        isXml = true;
  +		return sourceEnc;
   	    } else {
   	        isXml = false;
   	    }
   	}
   
  -	if (newEncoding != null) {
  -	    // encoding specified in jsp-config
  -	    return;
  +	// At this point we know it's JSP syntax ...
  +	if (pageInfo.getPageEncoding() != null) {
  +	    return pageInfo.getPageEncoding();
  +	} else {
  +	    return getSourceEncodingForJspSyntax(jspReader, startMark);
   	}
  -
  -	// Figure out the encoding of the page
  -	// FIXME: We assume xml parser will take care of
  -        // encoding for page in XML syntax. Correct?
  -	if (!isXml) {
  -	    jspReader.reset(startMark);
  -	    while (jspReader.skipUntil("<%@") != null) {
  +    }
  +    
  +    /*
  +     * Determines page source encoding for JSP page or tag file in JSP syntax
  +     */
  +    private String getSourceEncodingForJspSyntax(JspReader jspReader,
  +						 Mark startMark)
  +	        throws JasperException {
  +
  +	String encoding = null;
  +
  +	jspReader.reset(startMark);
  +	while (jspReader.skipUntil("<%@") != null) {
  +	    jspReader.skipSpaces();
  +	    // compare for "tag ", so we don't match "taglib"
  +	    if (jspReader.matches("tag ") || jspReader.matches("page")) {
   		jspReader.skipSpaces();
  -		if (jspReader.matches( "tag " ) || jspReader.matches("page")) {
  -		    jspReader.skipSpaces();
  -		    Attributes attrs = Parser.parseAttributes(this, jspReader);
  -		    String attribute = "pageEncoding";
  -		    newEncoding = attrs.getValue("pageEncoding");
  -		    if (newEncoding == null) {
  -			String contentType = attrs.getValue("contentType");
  -			if (contentType != null) {
  -			    int loc = contentType.indexOf("charset=");
  -			    if (loc != -1) {
  -				newEncoding = contentType.substring(loc+8);
  -				return;
  -			    }
  -			}
  -			if (newEncoding == null)
  -			    newEncoding = "ISO-8859-1";
  -		    } else {
  -			return;
  +		Attributes attrs = Parser.parseAttributes(this, jspReader);
  +		encoding = attrs.getValue("pageEncoding");
  +		if (encoding != null) {
  +		    break;
  +		}
  +		String contentType = attrs.getValue("contentType");
  +		if (contentType != null) {
  +		    int loc = contentType.indexOf(CHARSET);
  +		    if (loc != -1) {
  +			encoding = contentType.substring(loc
  +							 + CHARSET.length());
  +			break;
   		    }
   		}
   	    }
   	}
  +
  +	if (encoding == null) {
  +	    // Default to "ISO-8859-1" per JSP spec
  +	    encoding = "ISO-8859-1";
  +	}
  +
  +	return encoding;
       }
  -    
  +
       /*
        * Resolve the name of the file and update
        * baseDirStack() to keep track ot the current
  @@ -348,34 +377,4 @@
   	return fileName;
       }
   
  -    private InputStreamReader getReader(String file, String encoding,
  -					JarFile jarFile)
  -	        throws JasperException, IOException {
  -
  -        InputStream in = null;
  -        InputStreamReader reader = null;
  -
  -	if (jarFile != null) {
  -	    String jarEntryName = file.substring(1, file.length());
  -	    ZipEntry jarEntry = jarFile.getEntry(jarEntryName);
  -	    if (jarEntry == null) {
  -		err.jspError("jsp.error.file.not.found", file);
  -	    }
  -	    in = jarFile.getInputStream(jarEntry);
  -	} else {
  -	    in = ctxt.getResourceAsStream(file);
  -	}
  -
  -	if (in == null) {
  -	    err.jspError("jsp.error.file.not.found", file);
  -	}
  -
  -	try {
  -            reader = new InputStreamReader(in, encoding);
  -	} catch (UnsupportedEncodingException ex) {
  -	    err.jspError("jsp.error.unsupported.encoding", encoding);
  -	}
  -
  -	return reader;
  -    }
   }
  
  
  
  1.50      +20 -10    jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java
  
  Index: Validator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- Validator.java	30 Oct 2002 17:41:22 -0000	1.49
  +++ Validator.java	6 Nov 2002 20:14:19 -0000	1.50
  @@ -1020,8 +1020,7 @@
   	 */
   	page.visit(new DirectiveVisitor(compiler));
   
  -	// Determine the default output content type, per errata_a
  -	// http://jcp.org/aboutJava/communityprocess/maintenance/jsr053/errata_1_2_a_20020321.html
  +	// Determine the default output content type
   	PageInfo pageInfo = compiler.getPageInfo();
   	String contentType = pageInfo.getContentType();
   	if (!compiler.getCompilationContext().isTagFile() && 
  @@ -1033,10 +1032,20 @@
   	    } else {
   		defaultType = contentType;
   	    }
  -	    String charset = pageInfo.getPageEncoding();
  -	    if (charset == null)
  -		charset = isXml? "UTF-8": "ISO-8859-1";
  -	    pageInfo.setContentType(defaultType + ";charset=" + charset);
  +
  +	    String charset = null;
  +	    if (isXml) {
  +		charset = "UTF-8";
  +	    } else {
  +		charset = pageInfo.getPageEncoding();
  +		// The resulting charset might be null
  +	    }
  +
  +	    if (charset != null) {
  +		pageInfo.setContentType(defaultType + ";charset=" + charset);
  +	    } else {
  +		pageInfo.setContentType(defaultType);
  +	    }
   	}
   
   	/*
  @@ -1051,7 +1060,8 @@
   	 * Invoke TagLibraryValidator classes of all imported tags
   	 * (second validation step for custom tags according to JSP.10.5).
   	 */
  -	validateXmlView(new PageDataImpl(page), compiler);
  +	validateXmlView(new PageDataImpl(page, compiler.getPageInfo()),
  +			compiler);
   
   	/*
   	 * Invoke TagExtraInfo method isValid() for all imported tags 
  
  
  
  1.55      +34 -1     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties
  
  Index: messages.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- messages.properties	6 Nov 2002 11:38:35 -0000	1.54
  +++ messages.properties	6 Nov 2002 20:14:20 -0000	1.55
  @@ -305,3 +305,36 @@
   jasper.error.emptybodycontent.nonempty=According to TLD, tag {0} must be empty, but is not
   jsp.error.tagfile.var_name_given_equals_attr_name=In tag file {0}, the name-given attribute ({1}) of a variable directive equals the name attribute of an attribute directive
   jsp.error.useBean.noSession=Illegal for useBean to use session scope when JSP page declares (via page directive) that it does not participate in sessions
  +jsp.error.xml.encodingByteOrderUnsupported = Given byte order for encoding \"{0}\" is not supported.
  +jsp.error.xml.encodingDeclInvalid = Invalid encoding name \"{0}\".
  +jsp.error.xml.encodingDeclRequired = The encoding declaration is required in the text declaration.
  +jsp.error.xml.morePseudoAttributes = more pseudo attributes is expected.
  +jsp.error.xml.noMorePseudoAttributes = no more pseudo attributes is allowed.
  +jsp.error.xml.versionInfoRequired = The version is required in the XML declaration.
  +jsp.error.xml.xmlDeclUnterminated = The XML declaration must end with \"?>\".
  +jsp.error.xml.reservedPITarget = The processing instruction target matching \"[xX][mM][lL]\" is not allowed.
  +jsp.error.xml.spaceRequiredInPI = White space is required between the processing instruction target and data.
  +jsp.error.xml.invalidCharInContent = An invalid XML character (Unicode: 0x{0}) was found in the element content of the document.
  +jsp.error.xml.spaceRequiredBeforeStandalone = White space is required before the encoding pseudo attribute in the XML declaration.
  +jsp.error.xml.sdDeclInvalid = The standalone document declaration value must be \"yes\" or \"no\", not \"{0}\".
  +jsp.error.xml.invalidCharInPI = An invalid XML character (Unicode: 0x{0}) was found in the processing instruction.
  +jsp.error.xml.versionNotSupported = XML version \"{0}\" is not supported, only XML 1.0 is supported.
  +jsp.error.xml.pseudoAttrNameExpected = a pseudo attribute name is expected.
  +jsp.error.xml.expectedByte = Expected byte {0} of {1}-byte UTF-8 sequence.
  +jsp.error.xml.invalidByte = Invalid byte {0} of {1}-byte UTF-8 sequence.
  +jsp.error.xml.operationNotSupported = Operation \"{0}\" not supported by {1} reader.
  +jsp.error.xml.invalidHighSurrogate = High surrogate bits in UTF-8 sequence must not exceed 0x10 but found 0x{0}.
  +jsp.error.xml.invalidASCII = Byte \"{0}\" not 7-bit ASCII.
  +jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl = White space is required before the encoding pseudo attribute in the XML declaration.
  +jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl = White space is required before the encoding pseudo attribute in the text declaration.
  +jsp.error.xml.spaceRequiredBeforeVersionInTextDecl = White space is required before the version pseudo attribute in the text declaration.
  +jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl = White space is required before the version pseudo attribute in the XML declaration.
  +jsp.error.xml.eqRequiredInXMLDecl = The '' = '' character must follow \"{0}\" in the XML declaration.
  +jsp.error.xml.eqRequiredInTextDecl = The '' = '' character must follow \"{0}\" in the text declaration.
  +jsp.error.xml.quoteRequiredInTextDecl = The value following \"{0}\" in the text declaration must be a quoted string.
  +jsp.error.xml.quoteRequiredInXMLDecl = The value following \"{0}\" in the XML declaration must be a quoted string.
  +jsp.error.xml.invalidCharInTextDecl = An invalid XML character (Unicode: 0x{0}) was found in the text declaration.
  +jsp.error.xml.invalidCharInXMLDecl = An invalid XML character (Unicode: 0x{0}) was found in the XML declaration.
  +jsp.error.xml.closeQuoteMissingInTextDecl = closing quote in the value following \"{0}\" in the text declaration is missing.
  +jsp.error.xml.closeQuoteMissingInXMLDecl = closing quote in the value following \"{0}\" in the XML declaration is missing.
  +jsp.error.xml.invalidHighSurrogate = High surrogate bits in UTF-8 sequence must not exceed 0x10 but found 0x{0}.
  
  
  
  1.21      +34 -1     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties
  
  Index: messages_es.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- messages_es.properties	4 Nov 2002 19:15:33 -0000	1.20
  +++ messages_es.properties	6 Nov 2002 20:14:20 -0000	1.21
  @@ -223,3 +223,36 @@
   jasper.error.emptybodycontent.nonempty=
   jsp.error.tagfile.var_name_given_equals_attr_name=
   jsp.error.useBean.noSession=
  +jsp.error.xml.encodingByteOrderUnsupported=
  +jsp.error.xml.encodingDeclInvalid=
  +jsp.error.xml.encodingDeclRequired=
  +jsp.error.xml.morePseudoAttributes=
  +jsp.error.xml.noMorePseudoAttributes=
  +jsp.error.xml.versionInfoRequired=
  +jsp.error.xml.xmlDeclUnterminated=
  +jsp.error.xml.reservedPITarget=
  +jsp.error.xml.spaceRequiredInPI=
  +jsp.error.xml.invalidCharInContent=
  +jsp.error.xml.spaceRequiredBeforeStandalone=
  +jsp.error.xml.sdDeclInvalid=
  +jsp.error.xml.invalidCharInPI=
  +jsp.error.xml.versionNotSupported=
  +jsp.error.xml.pseudoAttrNameExpected=
  +jsp.error.xml.expectedByte=
  +jsp.error.xml.invalidByte=
  +jsp.error.xml.operationNotSupported=
  +jsp.error.xml.invalidHighSurrogate=
  +jsp.error.xml.invalidASCII=
  +jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl=
  +jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl=
  +jsp.error.xml.spaceRequiredBeforeVersionInTextDecl=
  +jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl=
  +jsp.error.xml.eqRequiredInXMLDecl=
  +jsp.error.xml.eqRequiredInTextDecl=
  +jsp.error.xml.quoteRequiredInTextDecl=
  +jsp.error.xml.quoteRequiredInXMLDecl=
  +jsp.error.xml.invalidCharInTextDecl=
  +jsp.error.xml.invalidCharInXMLDecl=
  +jsp.error.xml.closeQuoteMissingInTextDecl=
  +jsp.error.xml.closeQuoteMissingInXMLDecl=
  +jsp.error.xml.invalidHighSurrogate=
  
  
  
  1.21      +34 -1     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties
  
  Index: messages_ja.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- messages_ja.properties	4 Nov 2002 19:15:34 -0000	1.20
  +++ messages_ja.properties	6 Nov 2002 20:14:20 -0000	1.21
  @@ -254,3 +254,36 @@
   jasper.error.emptybodycontent.nonempty=
   jsp.error.tagfile.var_name_given_equals_attr_name=
   jsp.error.useBean.noSession=
  +jsp.error.xml.encodingByteOrderUnsupported=
  +jsp.error.xml.encodingDeclInvalid=
  +jsp.error.xml.encodingDeclRequired=
  +jsp.error.xml.morePseudoAttributes=
  +jsp.error.xml.noMorePseudoAttributes=
  +jsp.error.xml.versionInfoRequired=
  +jsp.error.xml.xmlDeclUnterminated=
  +jsp.error.xml.reservedPITarget=
  +jsp.error.xml.spaceRequiredInPI=
  +jsp.error.xml.invalidCharInContent=
  +jsp.error.xml.spaceRequiredBeforeStandalone=
  +jsp.error.xml.sdDeclInvalid=
  +jsp.error.xml.invalidCharInPI=
  +jsp.error.xml.versionNotSupported=
  +jsp.error.xml.pseudoAttrNameExpected=
  +jsp.error.xml.expectedByte=
  +jsp.error.xml.invalidByte=
  +jsp.error.xml.operationNotSupported=
  +jsp.error.xml.invalidHighSurrogate=
  +jsp.error.xml.invalidASCII=
  +jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl=
  +jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl=
  +jsp.error.xml.spaceRequiredBeforeVersionInTextDecl=
  +jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl=
  +jsp.error.xml.eqRequiredInXMLDecl=
  +jsp.error.xml.eqRequiredInTextDecl=
  +jsp.error.xml.quoteRequiredInTextDecl=
  +jsp.error.xml.quoteRequiredInXMLDecl=
  +jsp.error.xml.invalidCharInTextDecl=
  +jsp.error.xml.invalidCharInXMLDecl=
  +jsp.error.xml.closeQuoteMissingInTextDecl=
  +jsp.error.xml.closeQuoteMissingInXMLDecl=
  +jsp.error.xml.invalidHighSurrogate=
  
  
  
  1.1                  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/ASCIIReader.java
  
  Index: ASCIIReader.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.jasper.xmlparser;
  
  import java.io.InputStream;
  import java.io.IOException;
  import java.io.Reader;
  import org.apache.jasper.compiler.ErrorDispatcher;
  
  /**
   * A simple ASCII byte reader. This is an optimized reader for reading
   * byte streams that only contain 7-bit ASCII characters.
   *
   * @author Andy Clark, IBM
   *
   * @version $Id: ASCIIReader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
   */
  public class ASCIIReader
      extends Reader {
  
      //
      // Constants
      //
  
      /** Default byte buffer size (2048). */
      public static final int DEFAULT_BUFFER_SIZE = 2048;
  
      //
      // Data
      //
  
      /** Input stream. */
      protected InputStream fInputStream;
  
      /** Byte buffer. */
      protected byte[] fBuffer;
  
      private ErrorDispatcher err;
  
      //
      // Constructors
      //
  
      /** 
       * Constructs an ASCII reader from the specified input stream 
       * and buffer size.
       *
       * @param inputStream The input stream.
       * @param size        The initial buffer size.
       * @param err         The error dispatcher.
       */
      public ASCIIReader(InputStream inputStream, int size,
  		       ErrorDispatcher err) {
          fInputStream = inputStream;
          fBuffer = new byte[size];
  	this.err = err;
      }
  
      //
      // Reader methods
      //
  
      /**
       * Read a single character.  This method will block until a character is
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * <p> Subclasses that intend to support efficient single-character input
       * should override this method.
       *
       * @return     The character read, as an integer in the range 0 to 127
       *             (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
       *             been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read() throws IOException {
          int b0 = fInputStream.read();
          if (b0 > 0x80) {
              throw new IOException(err.getString("jsp.error.xml.invalidASCII",
  						Integer.toString(b0)));
          }
          return b0;
      } // read():int
  
      /**
       * Read characters into a portion of an array.  This method will block
       * until some input is available, an I/O error occurs, or the end of the
       * stream is reached.
       *
       * @param      ch     Destination buffer
       * @param      offset Offset at which to start storing characters
       * @param      length Maximum number of characters to read
       *
       * @return     The number of characters read, or -1 if the end of the
       *             stream has been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read(char ch[], int offset, int length) throws IOException {
          if (length > fBuffer.length) {
              length = fBuffer.length;
          }
          int count = fInputStream.read(fBuffer, 0, length);
          for (int i = 0; i < count; i++) {
              int b0 = fBuffer[i];
              if (b0 > 0x80) {
                  throw new IOException(err.getString("jsp.error.xml.invalidASCII",
  						    Integer.toString(b0)));
              }
              ch[offset + i] = (char)b0;
          }
          return count;
      } // read(char[],int,int)
  
      /**
       * Skip characters.  This method will block until some characters are
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * @param  n  The number of characters to skip
       *
       * @return    The number of characters actually skipped
       *
       * @exception  IOException  If an I/O error occurs
       */
      public long skip(long n) throws IOException {
          return fInputStream.skip(n);
      } // skip(long):long
  
      /**
       * Tell whether this stream is ready to be read.
       *
       * @return True if the next read() is guaranteed not to block for input,
       * false otherwise.  Note that returning false does not guarantee that the
       * next read will block.
       *
       * @exception  IOException  If an I/O error occurs
       */
      public boolean ready() throws IOException {
  	return false;
      } // ready()
  
      /**
       * Tell whether this stream supports the mark() operation.
       */
      public boolean markSupported() {
  	return fInputStream.markSupported();
      } // markSupported()
  
      /**
       * Mark the present position in the stream.  Subsequent calls to reset()
       * will attempt to reposition the stream to this point.  Not all
       * character-input streams support the mark() operation.
       *
       * @param  readAheadLimit  Limit on the number of characters that may be
       *                         read while still preserving the mark.  After
       *                         reading this many characters, attempting to
       *                         reset the stream may fail.
       *
       * @exception  IOException  If the stream does not support mark(),
       *                          or if some other I/O error occurs
       */
      public void mark(int readAheadLimit) throws IOException {
  	fInputStream.mark(readAheadLimit);
      } // mark(int)
  
      /**
       * Reset the stream.  If the stream has been marked, then attempt to
       * reposition it at the mark.  If the stream has not been marked, then
       * attempt to reset it in some way appropriate to the particular stream,
       * for example by repositioning it to its starting point.  Not all
       * character-input streams support the reset() operation, and some support
       * reset() without supporting mark().
       *
       * @exception  IOException  If the stream has not been marked,
       *                          or if the mark has been invalidated,
       *                          or if the stream does not support reset(),
       *                          or if some other I/O error occurs
       */
      public void reset() throws IOException {
          fInputStream.reset();
      } // reset()
  
      /**
       * Close the stream.  Once a stream has been closed, further read(),
       * ready(), mark(), or reset() invocations will throw an IOException.
       * Closing a previously-closed stream, however, has no effect.
       *
       * @exception  IOException  If an I/O error occurs
       */
       public void close() throws IOException {
           fInputStream.close();
       } // close()
  
  } // class ASCIIReader
  
  
  
  1.1                  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/UCSReader.java
  
  Index: UCSReader.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.jasper.xmlparser;
  
  import java.io.InputStream;
  import java.io.IOException;
  import java.io.Reader;
  
  /** 
   * Reader for UCS-2 and UCS-4 encodings.
   * (i.e., encodings from ISO-10646-UCS-(2|4)).
   *
   * @author Neil Graham, IBM
   *
   * @version $Id: UCSReader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
   */
  public class UCSReader extends Reader {
  
      //
      // Constants
      //
  
      /** Default byte buffer size (8192, larger than that of ASCIIReader
       * since it's reasonable to surmise that the average UCS-4-encoded
       * file should be 4 times as large as the average ASCII-encoded file). 
       */
      public static final int DEFAULT_BUFFER_SIZE = 8192;
  
      public static final short UCS2LE = 1;
      public static final short UCS2BE = 2;
      public static final short UCS4LE = 4;
      public static final short UCS4BE = 8;
  
      //
      // Data
      //
  
      /** Input stream. */
      protected InputStream fInputStream;
  
      /** Byte buffer. */
      protected byte[] fBuffer;
  
      // what kind of data we're dealing with
      protected short fEncoding;
  
      //
      // Constructors
      //
  
      /** 
       * Constructs an ASCII reader from the specified input stream 
       * using the default buffer size.  The Endian-ness and whether this is
       * UCS-2 or UCS-4 needs also to be known in advance.
       *
       * @param inputStream The input stream.
       * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
       */
      public UCSReader(InputStream inputStream, short encoding) {
          this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
      } // <init>(InputStream, short)
  
      /** 
       * Constructs an ASCII reader from the specified input stream 
       * and buffer size.  The Endian-ness and whether this is
       * UCS-2 or UCS-4 needs also to be known in advance.
       *
       * @param inputStream The input stream.
       * @param size        The initial buffer size.
       * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
       */
      public UCSReader(InputStream inputStream, int size, short encoding) {
          fInputStream = inputStream;
          fBuffer = new byte[size];
          fEncoding = encoding;
      } // <init>(InputStream,int,short)
  
      //
      // Reader methods
      //
  
      /**
       * Read a single character.  This method will block until a character is
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * <p> Subclasses that intend to support efficient single-character input
       * should override this method.
       *
       * @return     The character read, as an integer in the range 0 to 127
       *             (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
       *             been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read() throws IOException { 
          int b0 = fInputStream.read() & 0xff;
          if (b0 == 0xff)
              return -1;
          int b1 = fInputStream.read() & 0xff;
          if (b1 == 0xff)
              return -1;
          if(fEncoding >=4) {
              int b2 = fInputStream.read() & 0xff;
              if (b2 == 0xff)
                  return -1;
              int b3 = fInputStream.read() & 0xff;
              if (b3 == 0xff)
                  return -1;
              System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
              if (fEncoding == UCS4BE)
                  return (b0<<24)+(b1<<16)+(b2<<8)+b3;
              else
                  return (b3<<24)+(b2<<16)+(b1<<8)+b0;
          } else { // UCS-2
              if (fEncoding == UCS2BE)
                  return (b0<<8)+b1;
              else
                  return (b1<<8)+b0;
          }
      } // read():int
  
      /**
       * Read characters into a portion of an array.  This method will block
       * until some input is available, an I/O error occurs, or the end of the
       * stream is reached.
       *
       * @param      ch     Destination buffer
       * @param      offset Offset at which to start storing characters
       * @param      length Maximum number of characters to read
       *
       * @return     The number of characters read, or -1 if the end of the
       *             stream has been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read(char ch[], int offset, int length) throws IOException {
          int byteLength = length << ((fEncoding >= 4)?2:1);
          if (byteLength > fBuffer.length) {
              byteLength = fBuffer.length;
          }
          int count = fInputStream.read(fBuffer, 0, byteLength);
          if(count == -1) return -1;
          // try and make count be a multiple of the number of bytes we're looking for
          if(fEncoding >= 4) { // BigEndian
              // this looks ugly, but it avoids an if at any rate...
              int numToRead = (4 - (count & 3) & 3);
              for(int i=0; i<numToRead; i++) {
                  int charRead = fInputStream.read();
                  if(charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
                      for (int j = i;j<numToRead; j++)
                          fBuffer[count+j] = 0;
                      break;
                  } else {
                      fBuffer[count+i] = (byte)charRead; 
                  }
              }
              count += numToRead;
          } else {
              int numToRead = count & 1;
              if(numToRead != 0) {
                  count++;
                  int charRead = fInputStream.read();
                  if(charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
                      fBuffer[count] = 0;
                  } else {
                      fBuffer[count] = (byte)charRead;
                  }
              }
          }
  
          // now count is a multiple of the right number of bytes
          int numChars = count >> ((fEncoding >= 4)?2:1);
          int curPos = 0;
          for (int i = 0; i < numChars; i++) {
              int b0 = fBuffer[curPos++] & 0xff;
              int b1 = fBuffer[curPos++] & 0xff;
              if(fEncoding >=4) {
                  int b2 = fBuffer[curPos++] & 0xff;
                  int b3 = fBuffer[curPos++] & 0xff;
                  if (fEncoding == UCS4BE)
                      ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
                  else
                      ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
              } else { // UCS-2
                  if (fEncoding == UCS2BE)
                      ch[offset+i] = (char)((b0<<8)+b1);
                  else
                      ch[offset+i] = (char)((b1<<8)+b0);
              }
          }
          return numChars;
      } // read(char[],int,int)
  
      /**
       * Skip characters.  This method will block until some characters are
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * @param  n  The number of characters to skip
       *
       * @return    The number of characters actually skipped
       *
       * @exception  IOException  If an I/O error occurs
       */
      public long skip(long n) throws IOException {
          // charWidth will represent the number of bits to move
          // n leftward to get num of bytes to skip, and then move the result rightward
          // to get num of chars effectively skipped.
          // The trick with &'ing, as with elsewhere in this dcode, is
          // intended to avoid an expensive use of / that might not be optimized
          // away.
          int charWidth = (fEncoding >=4)?2:1;
          long bytesSkipped = fInputStream.skip(n<<charWidth);
          if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
          return (bytesSkipped >> charWidth) + 1;
      } // skip(long):long
  
      /**
       * Tell whether this stream is ready to be read.
       *
       * @return True if the next read() is guaranteed not to block for input,
       * false otherwise.  Note that returning false does not guarantee that the
       * next read will block.
       *
       * @exception  IOException  If an I/O error occurs
       */
      public boolean ready() throws IOException {
  	return false;
      } // ready()
  
      /**
       * Tell whether this stream supports the mark() operation.
       */
      public boolean markSupported() {
  	return fInputStream.markSupported();
      } // markSupported()
  
      /**
       * Mark the present position in the stream.  Subsequent calls to reset()
       * will attempt to reposition the stream to this point.  Not all
       * character-input streams support the mark() operation.
       *
       * @param  readAheadLimit  Limit on the number of characters that may be
       *                         read while still preserving the mark.  After
       *                         reading this many characters, attempting to
       *                         reset the stream may fail.
       *
       * @exception  IOException  If the stream does not support mark(),
       *                          or if some other I/O error occurs
       */
      public void mark(int readAheadLimit) throws IOException {
  	fInputStream.mark(readAheadLimit);
      } // mark(int)
  
      /**
       * Reset the stream.  If the stream has been marked, then attempt to
       * reposition it at the mark.  If the stream has not been marked, then
       * attempt to reset it in some way appropriate to the particular stream,
       * for example by repositioning it to its starting point.  Not all
       * character-input streams support the reset() operation, and some support
       * reset() without supporting mark().
       *
       * @exception  IOException  If the stream has not been marked,
       *                          or if the mark has been invalidated,
       *                          or if the stream does not support reset(),
       *                          or if some other I/O error occurs
       */
      public void reset() throws IOException {
          fInputStream.reset();
      } // reset()
  
      /**
       * Close the stream.  Once a stream has been closed, further read(),
       * ready(), mark(), or reset() invocations will throw an IOException.
       * Closing a previously-closed stream, however, has no effect.
       *
       * @exception  IOException  If an I/O error occurs
       */
       public void close() throws IOException {
           fInputStream.close();
       } // close()
  
  } // class UCSReader
  
  
  
  1.1                  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/UTF8Reader.java
  
  Index: UTF8Reader.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.jasper.xmlparser;
  
  import java.io.InputStream;
  import java.io.IOException;
  import java.io.Reader;
  import java.io.UTFDataFormatException;
  import org.apache.jasper.compiler.ErrorDispatcher;
  
  /**
   * @author Andy Clark, IBM
   *
   * @version $Id: UTF8Reader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
   */
  public class UTF8Reader
      extends Reader {
  
      //
      // Constants
      //
  
      /** Default byte buffer size (2048). */
      public static final int DEFAULT_BUFFER_SIZE = 2048;
  
      // debugging
  
      /** Debug read. */
      private static final boolean DEBUG_READ = false;
  
      //
      // Data
      //
  
      /** Input stream. */
      protected InputStream fInputStream;
  
      /** Byte buffer. */
      protected byte[] fBuffer;
  
      /** Offset into buffer. */
      protected int fOffset;
  
      /** Surrogate character. */
      private int fSurrogate = -1;
  
      private ErrorDispatcher err;
  
      //
      // Constructors
      //
  
      /** 
       * Constructs a UTF-8 reader from the specified input stream, 
       * buffer size and MessageFormatter.
       *
       * @param inputStream The input stream.
       * @param size        The initial buffer size.
       * @param err         The error dispatcher.
       */
      public UTF8Reader(InputStream inputStream, int size, ErrorDispatcher err) {
          fInputStream = inputStream;
          fBuffer = new byte[size];
          this.err = err;
      }
  
      //
      // Reader methods
      //
  
      /**
       * Read a single character.  This method will block until a character is
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * <p> Subclasses that intend to support efficient single-character input
       * should override this method.
       *
       * @return     The character read, as an integer in the range 0 to 16383
       *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
       *             been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read() throws IOException {
  
          // decode character
          int c = fSurrogate;
          if (fSurrogate == -1) {
              // NOTE: We use the index into the buffer if there are remaining
              //       bytes from the last block read. -Ac
              int index = 0;
  
              // get first byte
              int b0 = index == fOffset 
                     ? fInputStream.read() : fBuffer[index++] & 0x00FF;
              if (b0 == -1) {
                  return -1;
              }
  
              // UTF-8:   [0xxx xxxx]
              // Unicode: [0000 0000] [0xxx xxxx]
              if (b0 < 0x80) {
                  c = (char)b0;
              }
  
              // UTF-8:   [110y yyyy] [10xx xxxx]
              // Unicode: [0000 0yyy] [yyxx xxxx]
              else if ((b0 & 0xE0) == 0xC0) {
                  int b1 = index == fOffset 
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b1 == -1) {
                      expectedByte(2, 2);
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      invalidByte(2, 2, b1);
                  }
                  c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
              }
  
              // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
              // Unicode: [zzzz yyyy] [yyxx xxxx]
              else if ((b0 & 0xF0) == 0xE0) {
                  int b1 = index == fOffset
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b1 == -1) {
                      expectedByte(2, 3);
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      invalidByte(2, 3, b1);
                  }
                  int b2 = index == fOffset 
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b2 == -1) {
                      expectedByte(3, 3);
                  }
                  if ((b2 & 0xC0) != 0x80) {
                      invalidByte(3, 3, b2);
                  }
                  c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                      (b2 & 0x003F);
              }
  
              // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
              // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
              //          [1101 11yy] [yyxx xxxx] (low surrogate)
              //          * uuuuu = wwww + 1
              else if ((b0 & 0xF8) == 0xF0) {
                  int b1 = index == fOffset 
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b1 == -1) {
                      expectedByte(2, 4);
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      invalidByte(2, 3, b1);
                  }
                  int b2 = index == fOffset 
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b2 == -1) {
                      expectedByte(3, 4);
                  }
                  if ((b2 & 0xC0) != 0x80) {
                      invalidByte(3, 3, b2);
                  }
                  int b3 = index == fOffset 
                         ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                  if (b3 == -1) {
                      expectedByte(4, 4);
                  }
                  if ((b3 & 0xC0) != 0x80) {
                      invalidByte(4, 4, b3);
                  }
                  int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                  if (uuuuu > 0x10) {
                      invalidSurrogate(uuuuu);
                  }
                  int wwww = uuuuu - 1;
                  int hs = 0xD800 | 
                           ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | 
                           ((b2 >> 4) & 0x0003);
                  int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
                  c = hs;
                  fSurrogate = ls;
              }
  
              // error
              else {
                  invalidByte(1, 1, b0);
              }
          }
  
          // use surrogate
          else {
              fSurrogate = -1;
          }
  
          // return character
          if (DEBUG_READ) {
              System.out.println("read(): 0x"+Integer.toHexString(c));
          }
          return c;
  
      } // read():int
  
      /**
       * Read characters into a portion of an array.  This method will block
       * until some input is available, an I/O error occurs, or the end of the
       * stream is reached.
       *
       * @param      ch     Destination buffer
       * @param      offset Offset at which to start storing characters
       * @param      length Maximum number of characters to read
       *
       * @return     The number of characters read, or -1 if the end of the
       *             stream has been reached
       *
       * @exception  IOException  If an I/O error occurs
       */
      public int read(char ch[], int offset, int length) throws IOException {
  
          // handle surrogate
          int out = offset;
          if (fSurrogate != -1) {
              ch[offset + 1] = (char)fSurrogate;
              fSurrogate = -1;
              length--;
              out++;
          }
  
          // read bytes
          int count = 0;
          if (fOffset == 0) {
              // adjust length to read
              if (length > fBuffer.length) {
                  length = fBuffer.length;
              }
  
              // perform read operation
              count = fInputStream.read(fBuffer, 0, length);
              if (count == -1) {
                  return -1;
              }
              count += out - offset;
          }
  
          // skip read; last character was in error
          // NOTE: Having an offset value other than zero means that there was
          //       an error in the last character read. In this case, we have
          //       skipped the read so we don't consume any bytes past the 
          //       error. By signalling the error on the next block read we
          //       allow the method to return the most valid characters that
          //       it can on the previous block read. -Ac
          else {
              count = fOffset;
              fOffset = 0;
          }
  
          // convert bytes to characters
          final int total = count;
          for (int in = 0; in < total; in++) {
              int b0 = fBuffer[in] & 0x00FF;
  
              // UTF-8:   [0xxx xxxx]
              // Unicode: [0000 0000] [0xxx xxxx]
              if (b0 < 0x80) {
                  ch[out++] = (char)b0;
                  continue;
              }
  
              // UTF-8:   [110y yyyy] [10xx xxxx]
              // Unicode: [0000 0yyy] [yyxx xxxx]
              if ((b0 & 0xE0) == 0xC0) {
                  int b1 = -1;
                  if (++in < total) { 
                      b1 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b1 = fInputStream.read();
                      if (b1 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fOffset = 1;
                              return out - offset;
                          }
                          expectedByte(2, 2);
                      }
                      count++;
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fOffset = 2;
                          return out - offset;
                      }
                      invalidByte(2, 2, b1);
                  }
                  int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
                  ch[out++] = (char)c;
                  count -= 1;
                  continue;
              }
  
              // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
              // Unicode: [zzzz yyyy] [yyxx xxxx]
              if ((b0 & 0xF0) == 0xE0) {
                  int b1 = -1;
                  if (++in < total) { 
                      b1 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b1 = fInputStream.read();
                      if (b1 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fOffset = 1;
                              return out - offset;
                          }
                          expectedByte(2, 3);
                      }
                      count++;
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fOffset = 2;
                          return out - offset;
                      }
                      invalidByte(2, 3, b1);
                  }
                  int b2 = -1;
                  if (++in < total) { 
                      b2 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b2 = fInputStream.read();
                      if (b2 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fBuffer[1] = (byte)b1;
                              fOffset = 2;
                              return out - offset;
                          }
                          expectedByte(3, 3);
                      }
                      count++;
                  }
                  if ((b2 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fBuffer[2] = (byte)b2;
                          fOffset = 3;
                          return out - offset;
                      }
                      invalidByte(3, 3, b2);
                  }
                  int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                          (b2 & 0x003F);
                  ch[out++] = (char)c;
                  count -= 2;
                  continue;
              }
  
              // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
              // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
              //          [1101 11yy] [yyxx xxxx] (low surrogate)
              //          * uuuuu = wwww + 1
              if ((b0 & 0xF8) == 0xF0) {
                  int b1 = -1;
                  if (++in < total) { 
                      b1 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b1 = fInputStream.read();
                      if (b1 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fOffset = 1;
                              return out - offset;
                          }
                          expectedByte(2, 4);
                      }
                      count++;
                  }
                  if ((b1 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fOffset = 2;
                          return out - offset;
                      }
                      invalidByte(2, 4, b1);
                  }
                  int b2 = -1;
                  if (++in < total) { 
                      b2 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b2 = fInputStream.read();
                      if (b2 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fBuffer[1] = (byte)b1;
                              fOffset = 2;
                              return out - offset;
                          }
                          expectedByte(3, 4);
                      }
                      count++;
                  }
                  if ((b2 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fBuffer[2] = (byte)b2;
                          fOffset = 3;
                          return out - offset;
                      }
                      invalidByte(3, 4, b2);
                  }
                  int b3 = -1;
                  if (++in < total) { 
                      b3 = fBuffer[in] & 0x00FF; 
                  }
                  else {
                      b3 = fInputStream.read();
                      if (b3 == -1) {
                          if (out > offset) {
                              fBuffer[0] = (byte)b0;
                              fBuffer[1] = (byte)b1;
                              fBuffer[2] = (byte)b2;
                              fOffset = 3;
                              return out - offset;
                          }
                          expectedByte(4, 4);
                      }
                      count++;
                  }
                  if ((b3 & 0xC0) != 0x80) {
                      if (out > offset) {
                          fBuffer[0] = (byte)b0;
                          fBuffer[1] = (byte)b1;
                          fBuffer[2] = (byte)b2;
                          fBuffer[3] = (byte)b3;
                          fOffset = 4;
                          return out - offset;
                      }
                      invalidByte(4, 4, b2);
                  }
  
                  // decode bytes into surrogate characters
                  int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                  if (uuuuu > 0x10) {
                      invalidSurrogate(uuuuu);
                  }
                  int wwww = uuuuu - 1;
                  int zzzz = b1 & 0x000F;
                  int yyyyyy = b2 & 0x003F;
                  int xxxxxx = b3 & 0x003F;
                  int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
                  int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
  
                  // set characters
                  ch[out++] = (char)hs;
                  ch[out++] = (char)ls;
                  count -= 2;
                  continue;
              }
  
              // error
              if (out > offset) {
                  fBuffer[0] = (byte)b0;
                  fOffset = 1;
                  return out - offset;
              }
              invalidByte(1, 1, b0);
          }
  
          // return number of characters converted
          if (DEBUG_READ) {
              System.out.println("read(char[],"+offset+','+length+"): count="+count);
          }
          return count;
  
      } // read(char[],int,int)
  
      /**
       * Skip characters.  This method will block until some characters are
       * available, an I/O error occurs, or the end of the stream is reached.
       *
       * @param  n  The number of characters to skip
       *
       * @return    The number of characters actually skipped
       *
       * @exception  IOException  If an I/O error occurs
       */
      public long skip(long n) throws IOException {
  
          long remaining = n;
          final char[] ch = new char[fBuffer.length];
          do {
              int length = ch.length < remaining ? ch.length : (int)remaining;
              int count = read(ch, 0, length);
              if (count > 0) {
                  remaining -= count;
              }
              else {
                  break;
              }
          } while (remaining > 0);
  
          long skipped = n - remaining;
          return skipped;
  
      } // skip(long):long
  
      /**
       * Tell whether this stream is ready to be read.
       *
       * @return True if the next read() is guaranteed not to block for input,
       * false otherwise.  Note that returning false does not guarantee that the
       * next read will block.
       *
       * @exception  IOException  If an I/O error occurs
       */
      public boolean ready() throws IOException {
  	    return false;
      } // ready()
  
      /**
       * Tell whether this stream supports the mark() operation.
       */
      public boolean markSupported() {
  	    return false;
      } // markSupported()
  
      /**
       * Mark the present position in the stream.  Subsequent calls to reset()
       * will attempt to reposition the stream to this point.  Not all
       * character-input streams support the mark() operation.
       *
       * @param  readAheadLimit  Limit on the number of characters that may be
       *                         read while still preserving the mark.  After
       *                         reading this many characters, attempting to
       *                         reset the stream may fail.
       *
       * @exception  IOException  If the stream does not support mark(),
       *                          or if some other I/O error occurs
       */
      public void mark(int readAheadLimit) throws IOException {
  	    throw new IOException(err.getString("jsp.error.xml.operationNotSupported",
  						"mark()", "UTF-8"));
      }
  
      /**
       * Reset the stream.  If the stream has been marked, then attempt to
       * reposition it at the mark.  If the stream has not been marked, then
       * attempt to reset it in some way appropriate to the particular stream,
       * for example by repositioning it to its starting point.  Not all
       * character-input streams support the reset() operation, and some support
       * reset() without supporting mark().
       *
       * @exception  IOException  If the stream has not been marked,
       *                          or if the mark has been invalidated,
       *                          or if the stream does not support reset(),
       *                          or if some other I/O error occurs
       */
      public void reset() throws IOException {
          fOffset = 0;
          fSurrogate = -1;
      } // reset()
  
      /**
       * Close the stream.  Once a stream has been closed, further read(),
       * ready(), mark(), or reset() invocations will throw an IOException.
       * Closing a previously-closed stream, however, has no effect.
       *
       * @exception  IOException  If an I/O error occurs
       */
      public void close() throws IOException {
          fInputStream.close();
      } // close()
  
      //
      // Private methods
      //
  
      /** Throws an exception for expected byte. */
      private void expectedByte(int position, int count)
          throws UTFDataFormatException {
  
          throw new UTFDataFormatException(
                  err.getString("jsp.error.xml.expectedByte",
  			      Integer.toString(position),
  			      Integer.toString(count)));
  
      } // expectedByte(int,int,int)
  
      /** Throws an exception for invalid byte. */
      private void invalidByte(int position, int count, int c) 
          throws UTFDataFormatException {
  
          throw new UTFDataFormatException(
                  err.getString("jsp.error.xml.invalidByte",
  			      Integer.toString(position),
  			      Integer.toString(count)));
      } // invalidByte(int,int,int,int)
  
      /** Throws an exception for invalid surrogate bits. */
      private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
          
          throw new UTFDataFormatException(
                  err.getString("jsp.error.xml.invalidHighSurrogate",
  			      Integer.toHexString(uuuuu)));
      } // invalidSurrogate(int)
  
  } // class UTF8Reader
  
  
  
  1.1                  jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java
  
  Index: XMLEncodingDetector.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.jasper.xmlparser;
  
  import java.io.EOFException;
  import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.io.IOException;
  import java.io.Reader;
  import java.util.Locale;
  import java.util.jar.JarFile;
  
  import org.apache.jasper.JasperException;
  import org.apache.jasper.JspCompilationContext;
  import org.apache.jasper.compiler.ErrorDispatcher;
  import org.apache.jasper.compiler.JspUtil;
  
  import org.apache.xerces.util.EncodingMap;
  import org.apache.xerces.util.SymbolTable;
  import org.apache.xerces.util.XMLChar;
  import org.apache.xerces.util.XMLStringBuffer;
  import org.apache.xerces.xni.XMLString;
  
  public class XMLEncodingDetector {
      
      private InputStream stream;
      private String encoding;
      private boolean isEncodingSetInProlog;
      private Boolean isBigEndian;
      private Reader reader;
      
      // org.apache.xerces.impl.XMLEntityManager fields
      public static final int DEFAULT_BUFFER_SIZE = 2048;
      public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
      private boolean fAllowJavaEncodings;
      private SymbolTable fSymbolTable;
      private XMLEncodingDetector fCurrentEntity;
      private int fBufferSize = DEFAULT_BUFFER_SIZE;
      
      // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
      private int lineNumber = 1;
      private int columnNumber = 1;
      private boolean literal;
      private char[] ch = new char[DEFAULT_BUFFER_SIZE];
      private int position;
      private int count;
      private boolean mayReadChunks = false;
      
      // org.apache.xerces.impl.XMLScanner fields
      private XMLString fString = new XMLString();    
      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
      private final static String fVersionSymbol = "version";
      private final static String fEncodingSymbol = "encoding";
      private final static String fStandaloneSymbol = "standalone";
      
      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
      private int fMarkupDepth = 0;
      private String[] fStrings = new String[3];
  
      private ErrorDispatcher err;
      
      /**
       * Autodetects the encoding of the XML document supplied by the given
       * input stream.
       *
       * Encoding autodetection is done according to the XML 1.0 specification,
       * Appendix F.1: Detection Without External Encoding Information.
       *
       * @param in The input stream to read
       * @param err The error dispatcher
       *
       * @return Two-element array, where the first element (of type
       * java.lang.String) contains the name of the autodetected encoding, and
       * the second element (of type java.lang.Boolean) specifies whether the 
       * encoding was specified by the encoding attribute of an XML declaration
       * (prolog).
       */
      public static Object[] getEncoding(InputStream in, ErrorDispatcher err)
  	throws IOException, JasperException
      {
  	XMLEncodingDetector detector = new XMLEncodingDetector(in, err);
  	detector.createInitialReader();
  	detector.scanXMLDecl();
  	
  	return new Object[] { detector.encoding,
  			      new Boolean(detector.isEncodingSetInProlog) };
      }
  
      public static Object[] getEncoding(String fname, JarFile jarFile,
  				       JspCompilationContext ctxt,
  				       ErrorDispatcher err)
  	throws IOException, JasperException
      {
  	InputStream inStream = JspUtil.getInputStream(fname, jarFile,
  						      ctxt, err);
  	Object[] ret = getEncoding(inStream, err);
  	inStream.close();
  
  	return ret;
      }
  	
      /**
       * Constructor.
       */
      public XMLEncodingDetector(InputStream stream, ErrorDispatcher err) {
          this.stream = stream;
  	this.err = err;
          fSymbolTable = new SymbolTable();
          fCurrentEntity = this;
      }
      
      // stub method
      void endEntity() {
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.startEntity()
      private void createInitialReader() throws IOException, JasperException {
  
  	// wrap this stream in RewindableInputStream
  	stream = new RewindableInputStream(stream);
  
  	// perform auto-detect of encoding if necessary
  	if (encoding == null) {
  	    // read first four bytes and determine encoding
  	    final byte[] b4 = new byte[4];
  	    int count = 0;
  	    for (; count<4; count++ ) {
  		b4[count] = (byte)stream.read();
  	    }
  	    if (count == 4) {
  		Object [] encodingDesc = getEncodingName(b4, count);
  		encoding = (String)(encodingDesc[0]);
  		isBigEndian = (Boolean)(encodingDesc[1]);
  
  		stream.reset();
  		// Special case UTF-8 files with BOM created by Microsoft
  		// tools. It's more efficient to consume the BOM than make
  		// the reader perform extra checks. -Ac
  		if (count > 2 && encoding.equals("UTF-8")) {
  		    int b0 = b4[0] & 0xFF;
  		    int b1 = b4[1] & 0xFF;
  		    int b2 = b4[2] & 0xFF;
  		    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
  			// ignore first three bytes...
  			stream.skip(3);
  		    }
  		}
  		reader = createReader(stream, encoding, isBigEndian);
  	    } else {
  		reader = createReader(stream, encoding, isBigEndian);
  	    }
  	}
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.createReader
      /**
       * Creates a reader capable of reading the given input stream in
       * the specified encoding.
       *
       * @param inputStream  The input stream.
       * @param encoding     The encoding name that the input stream is
       *                     encoded using. If the user has specified that
       *                     Java encoding names are allowed, then the
       *                     encoding name may be a Java encoding name;
       *                     otherwise, it is an ianaEncoding name.
       * @param isBigEndian   For encodings (like uCS-4), whose names cannot
       *                      specify a byte order, this tells whether the order
       *                      is bigEndian. null means unknown or not relevant.
       *
       * @return Returns a reader.
       */
      private Reader createReader(InputStream inputStream, String encoding,
  				Boolean isBigEndian)
                  throws IOException, JasperException {
  
          // normalize encoding name
          if (encoding == null) {
              encoding = "UTF-8";
          }
  
          // try to use an optimized reader
          String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
          if (ENCODING.equals("UTF-8")) {
              return new UTF8Reader(inputStream, fBufferSize, err);
          }
          if (ENCODING.equals("US-ASCII")) {
              return new ASCIIReader(inputStream, fBufferSize, err);
          }
          if (ENCODING.equals("ISO-10646-UCS-4")) {
              if (isBigEndian != null) {
                  boolean isBE = isBigEndian.booleanValue();
                  if (isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS4BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS4LE);
                  }
              } else {
                  err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  			     encoding);
              }
          }
          if (ENCODING.equals("ISO-10646-UCS-2")) {
              if (isBigEndian != null) { // sould never happen with this encoding...
                  boolean isBE = isBigEndian.booleanValue();
                  if (isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS2BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS2LE);
                  }
              } else {
                  err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
  			     encoding);
              }
          }
  
          // check for valid name
          boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
          boolean validJava = XMLChar.isValidJavaEncoding(encoding);
          if (!validIANA || (fAllowJavaEncodings && !validJava)) {
              err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
              // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
              //       because every byte is a valid ISO Latin 1 character.
              //       It may not translate correctly but if we failed on
              //       the encoding anyway, then we're expecting the content
              //       of the document to be bad. This will just prevent an
              //       invalid UTF-8 sequence to be detected. This is only
              //       important when continue-after-fatal-error is turned
              //       on. -Ac
              encoding = "ISO-8859-1";
          }
  
          // try to use a Java reader
          String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
          if (javaEncoding == null) {
              if (fAllowJavaEncodings) {
  		javaEncoding = encoding;
              } else {
                  err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
                  // see comment above.
                  javaEncoding = "ISO8859_1";
              }
          }
          return new InputStreamReader(inputStream, javaEncoding);
  
      } // createReader(InputStream,String, Boolean): Reader
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.getEncodingName
      /**
       * Returns the IANA encoding name that is auto-detected from
       * the bytes specified, with the endian-ness of that encoding where
       * appropriate.
       *
       * @param b4    The first four bytes of the input.
       * @param count The number of bytes actually read.
       * @return a 2-element array:  the first element, an IANA-encoding string,
       *  the second element a Boolean which is true iff the document is big
       *  endian, false if it's little-endian, and null if the distinction isn't
       *  relevant.
       */
      private Object[] getEncodingName(byte[] b4, int count) {
  
          if (count < 2) {
              return new Object[]{"UTF-8", null};
          }
  
          // UTF-16, with BOM
          int b0 = b4[0] & 0xFF;
          int b1 = b4[1] & 0xFF;
          if (b0 == 0xFE && b1 == 0xFF) {
              // UTF-16, big-endian
              return new Object [] {"UTF-16BE", new Boolean(true)};
          }
          if (b0 == 0xFF && b1 == 0xFE) {
              // UTF-16, little-endian
              return new Object [] {"UTF-16LE", new Boolean(false)};
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 3) {
              return new Object [] {"UTF-8", null};
          }
  
          // UTF-8 with a BOM
          int b2 = b4[2] & 0xFF;
          if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
              return new Object [] {"UTF-8", null};
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 4) {
              return new Object [] {"UTF-8", null};
          }
  
          // other encodings
          int b3 = b4[3] & 0xFF;
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
              // UCS-4, big endian (1234)
              return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, little endian (4321)
              return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
          }
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
              // UCS-4, unusual octet order (2143)
              // REVISIT: What should this be?
              return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, unusual octect order (3412)
              // REVISIT: What should this be?
              return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
              // UTF-16, big-endian, no BOM
              // (or could turn out to be UCS-2...
              // REVISIT: What should this be?
              return new Object [] {"UTF-16BE", new Boolean(true)};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
              // UTF-16, little-endian, no BOM
              // (or could turn out to be UCS-2...
              return new Object [] {"UTF-16LE", new Boolean(false)};
          }
          if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
              // EBCDIC
              // a la xerces1, return CP037 instead of EBCDIC here
              return new Object [] {"CP037", null};
          }
  
          // default encoding
          return new Object [] {"UTF-8", null};
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
      /** Returns true if the current entity being scanned is external. */
      public boolean isExternal() {
  	return true;
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
      /**
       * Returns the next character on the input.
       * <p>
       * <strong>Note:</strong> The character is <em>not</em> consumed.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int peekChar() throws IOException {
  	
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  	
  	// peek at character
  	int c = fCurrentEntity.ch[fCurrentEntity.position];
  
  	// return peeked character
  	if (fCurrentEntity.isExternal()) {
  	    return c != '\r' ? c : '\n';
  	}
  	else {
  	    return c;
  	}
  	
      } // peekChar():int
      
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
      /**
       * Returns the next character on the input.
       * <p>
       * <strong>Note:</strong> The character is consumed.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int scanChar() throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// scan character
  	int c = fCurrentEntity.ch[fCurrentEntity.position++];
  	boolean external = false;
  	if (c == '\n' ||
  	    (c == '\r' && (external = fCurrentEntity.isExternal()))) {
  	    fCurrentEntity.lineNumber++;
  	    fCurrentEntity.columnNumber = 1;
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = (char)c;
  		load(1, false);
  	    }
  	    if (c == '\r' && external) {
  		if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  		    fCurrentEntity.position--;
  		}
  		c = '\n';
  	    }
  	}
  
  	// return character that was scanned
  	fCurrentEntity.columnNumber++;
  	return c;
  	
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
      /**
       * Returns a string matching the Name production appearing immediately
       * on the input as a symbol, or null if no Name string is present.
       * <p>
       * <strong>Note:</strong> The Name characters are consumed.
       * <p>
       * <strong>Note:</strong> The string returned must be a symbol. The
       * SymbolTable can be used for this purpose.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       *
       * @see org.apache.xerces.util.SymbolTable
       * @see org.apache.xerces.util.XMLChar#isName
       * @see org.apache.xerces.util.XMLChar#isNameStart
       */
      public String scanName() throws IOException {
  	
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  	
  	// scan name
  	int offset = fCurrentEntity.position;
  	if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  	    if (++fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  		offset = 0;
  		if (load(1, false)) {
  		    fCurrentEntity.columnNumber++;
  		    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
  							   0, 1);
  		    return symbol;
  		}
  	    }
  	    while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  		if (++fCurrentEntity.position == fCurrentEntity.count) {
  		    int length = fCurrentEntity.position - offset;
  		    if (length == fBufferSize) {
  			// bad luck we have to resize our buffer
  			char[] tmp = new char[fBufferSize * 2];
  			System.arraycopy(fCurrentEntity.ch, offset,
  					 tmp, 0, length);
  			fCurrentEntity.ch = tmp;
  			fBufferSize *= 2;
  		    } else {
  			System.arraycopy(fCurrentEntity.ch, offset,
  					 fCurrentEntity.ch, 0, length);
  		    }
  		    offset = 0;
  		    if (load(length, false)) {
  			break;
  		    }
  		}
  	    }
  	}
  	int length = fCurrentEntity.position - offset;
  	fCurrentEntity.columnNumber += length;
  
  	// return name
  	String symbol = null;
  	if (length > 0) {
  	    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  	}
  	return symbol;
  	
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
      /**
       * Scans a range of attribute value data, setting the fields of the
       * XMLString structure, appropriately.
       * <p>
       * <strong>Note:</strong> The characters are consumed.
       * <p>
       * <strong>Note:</strong> This method does not guarantee to return
       * the longest run of attribute value data. This method may return
       * before the quote character due to reaching the end of the input
       * buffer or any other reason.
       * <p>
       * <strong>Note:</strong> The fields contained in the XMLString
       * structure are not guaranteed to remain valid upon subsequent calls
       * to the entity scanner. Therefore, the caller is responsible for
       * immediately using the returned character data or making a copy of
       * the character data.
       *
       * @param quote   The quote character that signifies the end of the
       *                attribute value data.
       * @param content The content structure to fill.
       *
       * @return Returns the next character on the input, if known. This
       *         value may be -1 but this does <em>note</em> designate
       *         end of file.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public int scanLiteral(int quote, XMLString content)
  	throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  	    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  	    load(1, false);
  	    fCurrentEntity.position = 0;
  	}
  
  	// normalize newlines
  	int offset = fCurrentEntity.position;
  	int c = fCurrentEntity.ch[offset];
  	int newlines = 0;
  	boolean external = fCurrentEntity.isExternal();
  	if (c == '\n' || (c == '\r' && external)) {
  	    do {
  		c = fCurrentEntity.ch[fCurrentEntity.position++];
  		if (c == '\r' && external) {
  		    newlines++;
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			offset = 0;
  			fCurrentEntity.position = newlines;
  			if (load(newlines, false)) {
  			    break;
  			}
  		    }
  		    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  			fCurrentEntity.position++;
  			offset++;
  		    }
  		    /*** NEWLINE NORMALIZATION ***/
  		    else {
  			newlines++;
  		    }
  		    /***/
  		}
  		else if (c == '\n') {
  		    newlines++;
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			offset = 0;
  			fCurrentEntity.position = newlines;
  			if (load(newlines, false)) {
  			    break;
  			}
  		    }
  		    /*** NEWLINE NORMALIZATION ***
  			 if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  			 && external) {
  			 fCurrentEntity.position++;
  			 offset++;
  			 }
  			 /***/
  		}
  		else {
  		    fCurrentEntity.position--;
  		    break;
  		}
  	    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  	    for (int i = offset; i < fCurrentEntity.position; i++) {
  		fCurrentEntity.ch[i] = '\n';
  	    }
  	    int length = fCurrentEntity.position - offset;
  	    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  		content.setValues(fCurrentEntity.ch, offset, length);
  		return -1;
  	    }
  	}
  
  	// scan literal value
  	while (fCurrentEntity.position < fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if ((c == quote &&
  		 (!fCurrentEntity.literal || external))
  		|| c == '%' || !XMLChar.isContent(c)) {
  		fCurrentEntity.position--;
  		break;
  	    }
  	}
  	int length = fCurrentEntity.position - offset;
  	fCurrentEntity.columnNumber += length - newlines;
  	content.setValues(fCurrentEntity.ch, offset, length);
  
  	// return next character
  	if (fCurrentEntity.position != fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position];
  	    // NOTE: We don't want to accidentally signal the
  	    //       end of the literal if we're expanding an
  	    //       entity appearing in the literal. -Ac
  	    if (c == quote && fCurrentEntity.literal) {
  		c = -1;
  	    }
  	}
  	else {
  	    c = -1;
  	}
  	return c;
  
      }
  
      /**
       * Scans a range of character data up to the specified delimiter,
       * setting the fields of the XMLString structure, appropriately.
       * <p>
       * <strong>Note:</strong> The characters are consumed.
       * <p>
       * <strong>Note:</strong> This assumes that the internal buffer is
       * at least the same size, or bigger, than the length of the delimiter
       * and that the delimiter contains at least one character.
       * <p>
       * <strong>Note:</strong> This method does not guarantee to return
       * the longest run of character data. This method may return before
       * the delimiter due to reaching the end of the input buffer or any
       * other reason.
       * <p>
       * <strong>Note:</strong> The fields contained in the XMLString
       * structure are not guaranteed to remain valid upon subsequent calls
       * to the entity scanner. Therefore, the caller is responsible for
       * immediately using the returned character data or making a copy of
       * the character data.
       *
       * @param delimiter The string that signifies the end of the character
       *                  data to be scanned.
       * @param data      The data structure to fill.
       *
       * @return Returns true if there is more data to scan, false otherwise.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean scanData(String delimiter, XMLStringBuffer buffer)
  	throws IOException {
  
  	boolean done = false;
  	int delimLen = delimiter.length();
  	char charAt0 = delimiter.charAt(0);
  	boolean external = fCurrentEntity.isExternal();
  	do {
      
  	    // load more characters, if needed
      
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		load(0, true);
  	    }
  	    else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
  				 fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
  		load(fCurrentEntity.count - fCurrentEntity.position, false);
  		fCurrentEntity.position = 0;
  	    } 
  	    if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  		// something must be wrong with the input: e.g., file ends an
  		// unterminated comment
  		int length = fCurrentEntity.count - fCurrentEntity.position;
  		buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
  			       length); 
  		fCurrentEntity.columnNumber += fCurrentEntity.count;
  		fCurrentEntity.position = fCurrentEntity.count;
  		load(0,true);
  		return false;
  	    }
      
  	    // normalize newlines
  	    int offset = fCurrentEntity.position;
  	    int c = fCurrentEntity.ch[offset];
  	    int newlines = 0;
  	    if (c == '\n' || (c == '\r' && external)) {
  		do {
  		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  		    if (c == '\r' && external) {
  			newlines++;
  			fCurrentEntity.lineNumber++;
  			fCurrentEntity.columnNumber = 1;
  			if (fCurrentEntity.position == fCurrentEntity.count) {
  			    offset = 0;
  			    fCurrentEntity.position = newlines;
  			    if (load(newlines, false)) {
  				break;
  			    }
  			}
  			if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  			    fCurrentEntity.position++;
  			    offset++;
  			}
  			/*** NEWLINE NORMALIZATION ***/
  			else {
  			    newlines++;
  			}
  		    }
  		    else if (c == '\n') {
  			newlines++;
  			fCurrentEntity.lineNumber++;
  			fCurrentEntity.columnNumber = 1;
  			if (fCurrentEntity.position == fCurrentEntity.count) {
  			    offset = 0;
  			    fCurrentEntity.position = newlines;
  			    fCurrentEntity.count = newlines;
  			    if (load(newlines, false)) {
  				break;
  			    }
  			}
  		    }
  		    else {
  			fCurrentEntity.position--;
  			break;
  		    }
  		} while (fCurrentEntity.position < fCurrentEntity.count - 1);
  		for (int i = offset; i < fCurrentEntity.position; i++) {
  		    fCurrentEntity.ch[i] = '\n';
  		}
  		int length = fCurrentEntity.position - offset;
  		if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  		    buffer.append(fCurrentEntity.ch, offset, length);
  		    return true;
  		}
  	    }
      
  	    // iterate over buffer looking for delimiter
  	OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  	    c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if (c == charAt0) {
  		// looks like we just hit the delimiter
  		int delimOffset = fCurrentEntity.position - 1;
  		for (int i = 1; i < delimLen; i++) {
  		    if (fCurrentEntity.position == fCurrentEntity.count) {
  			fCurrentEntity.position -= i;
  			break OUTER;
  		    }
  		    c = fCurrentEntity.ch[fCurrentEntity.position++];
  		    if (delimiter.charAt(i) != c) {
  			fCurrentEntity.position--;
  			break;
  		    }
  		}
  		if (fCurrentEntity.position == delimOffset + delimLen) {
  		    done = true;
  		    break;
  		}
  	    }
  	    else if (c == '\n' || (external && c == '\r')) {
  		fCurrentEntity.position--;
  		break;
  	    }
  	    else if (XMLChar.isInvalid(c)) {
  		fCurrentEntity.position--;
  		int length = fCurrentEntity.position - offset;
  		fCurrentEntity.columnNumber += length - newlines;
  		buffer.append(fCurrentEntity.ch, offset, length); 
  		return true;
  	    }
  	}
  	    int length = fCurrentEntity.position - offset;
  	    fCurrentEntity.columnNumber += length - newlines;
  	    if (done) {
  		length -= delimLen;
  	    }
  	    buffer.append (fCurrentEntity.ch, offset, length);
      
  	    // return true if string was skipped
  	} while (!done);
  	return !done;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
      /**
       * Skips a character appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The character is consumed only if it matches
       * the specified character.
       *
       * @param c The character to skip.
       *
       * @return Returns true if the character was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean skipChar(int c) throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip character
  	int cc = fCurrentEntity.ch[fCurrentEntity.position];
  	if (cc == c) {
  	    fCurrentEntity.position++;
  	    if (c == '\n') {
  		fCurrentEntity.lineNumber++;
  		fCurrentEntity.columnNumber = 1;
  	    }
  	    else {
  		fCurrentEntity.columnNumber++;
  	    }
  	    return true;
  	} else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
  	    // handle newlines
  	    if (fCurrentEntity.position == fCurrentEntity.count) {
  		fCurrentEntity.ch[0] = (char)cc;
  		load(1, false);
  	    }
  	    fCurrentEntity.position++;
  	    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  		fCurrentEntity.position++;
  	    }
  	    fCurrentEntity.lineNumber++;
  	    fCurrentEntity.columnNumber = 1;
  	    return true;
  	}
  
  	// character was not skipped
  	return false;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
      /**
       * Skips space characters appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The characters are consumed only if they are
       * space characters.
       *
       * @return Returns true if at least one space character was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       *
       * @see org.apache.xerces.util.XMLChar#isSpace
       */
      public boolean skipSpaces() throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip spaces
  	int c = fCurrentEntity.ch[fCurrentEntity.position];
  	if (XMLChar.isSpace(c)) {
  	    boolean external = fCurrentEntity.isExternal();
  	    do {
  		boolean entityChanged = false;
  		// handle newlines
  		if (c == '\n' || (external && c == '\r')) {
  		    fCurrentEntity.lineNumber++;
  		    fCurrentEntity.columnNumber = 1;
  		    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  			fCurrentEntity.ch[0] = (char)c;
  			entityChanged = load(1, true);
  			if (!entityChanged)
                                  // the load change the position to be 1,
                                  // need to restore it when entity not changed
  			    fCurrentEntity.position = 0;
  		    }
  		    if (c == '\r' && external) {
  			// REVISIT: Does this need to be updated to fix the
  			//          #x0D ^#x0A newline normalization problem? -Ac
  			if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  			    fCurrentEntity.position--;
  			}
  		    }
  		    /*** NEWLINE NORMALIZATION ***
  			 else {
  			 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  			 && external) {
  			 fCurrentEntity.position++;
  			 }
  			 }
  			 /***/
  		}
  		else {
  		    fCurrentEntity.columnNumber++;
  		}
  		// load more characters, if needed
  		if (!entityChanged)
  		    fCurrentEntity.position++;
  		if (fCurrentEntity.position == fCurrentEntity.count) {
  		    load(0, true);
  		}
  	    } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
  	    return true;
  	}
  
  	// no spaces were found
  	return false;
  
      }
  
      /**
       * Skips the specified string appearing immediately on the input.
       * <p>
       * <strong>Note:</strong> The characters are consumed only if they are
       * space characters.
       *
       * @param s The string to skip.
       *
       * @return Returns true if the string was skipped.
       *
       * @throws IOException  Thrown if i/o error occurs.
       * @throws EOFException Thrown on end of file.
       */
      public boolean skipString(String s) throws IOException {
  
  	// load more characters, if needed
  	if (fCurrentEntity.position == fCurrentEntity.count) {
  	    load(0, true);
  	}
  
  	// skip string
  	final int length = s.length();
  	for (int i = 0; i < length; i++) {
  	    char c = fCurrentEntity.ch[fCurrentEntity.position++];
  	    if (c != s.charAt(i)) {
  		fCurrentEntity.position -= i + 1;
  		return false;
  	    }
  	    if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
  		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
  		// REVISIT: Can a string to be skipped cross an
  		//          entity boundary? -Ac
  		if (load(i + 1, false)) {
  		    fCurrentEntity.position -= i + 1;
  		    return false;
  		}
  	    }
  	}
  	fCurrentEntity.columnNumber += length;
  	return true;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
      /**
       * Loads a chunk of text.
       *
       * @param offset       The offset into the character buffer to
       *                     read the next batch of characters.
       * @param changeEntity True if the load should change entities
       *                     at the end of the entity, otherwise leave
       *                     the current entity in place and the entity
       *                     boundary will be signaled by the return
       *                     value.
       *
       * @returns Returns true if the entity changed as a result of this
       *          load operation.
       */
      final boolean load(int offset, boolean changeEntity)
  	throws IOException {
  
  	// read characters
  	int length = fCurrentEntity.mayReadChunks?
  	    (fCurrentEntity.ch.length - offset):
  	    (DEFAULT_XMLDECL_BUFFER_SIZE);
  	int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
  					       length);
  
  	// reset count and position
  	boolean entityChanged = false;
  	if (count != -1) {
  	    if (count != 0) {
  		fCurrentEntity.count = count + offset;
  		fCurrentEntity.position = offset;
  	    }
  	}
  
  	// end of this entity
  	else {
  	    fCurrentEntity.count = offset;
  	    fCurrentEntity.position = offset;
  	    entityChanged = true;
  	    if (changeEntity) {
  		endEntity();
  		if (fCurrentEntity == null) {
  		    throw new EOFException();
  		}
  		// handle the trailing edges
  		if (fCurrentEntity.position == fCurrentEntity.count) {
  		    load(0, true);
  		}
  	    }
  	}
  
  	return entityChanged;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
      /**
       * This class wraps the byte inputstreams we're presented with.
       * We need it because java.io.InputStreams don't provide
       * functionality to reread processed bytes, and they have a habit
       * of reading more than one character when you call their read()
       * methods.  This means that, once we discover the true (declared)
       * encoding of a document, we can neither backtrack to read the
       * whole doc again nor start reading where we are with a new
       * reader.
       *
       * This class allows rewinding an inputStream by allowing a mark
       * to be set, and the stream reset to that position.  <strong>The
       * class assumes that it needs to read one character per
       * invocation when it's read() method is inovked, but uses the
       * underlying InputStream's read(char[], offset length) method--it
       * won't buffer data read this way!</strong>
       *
       * @author Neil Graham, IBM
       * @author Glenn Marcy, IBM
       */
      private final class RewindableInputStream extends InputStream {
  
          private InputStream fInputStream;
          private byte[] fData;
          private int fStartOffset;
          private int fEndOffset;
          private int fOffset;
          private int fLength;
          private int fMark;
  
          public RewindableInputStream(InputStream is) {
              fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
              fInputStream = is;
              fStartOffset = 0;
              fEndOffset = -1;
              fOffset = 0;
              fLength = 0;
              fMark = 0;
          }
  
          public void setStartOffset(int offset) {
              fStartOffset = offset;
          }
  
          public void rewind() {
              fOffset = fStartOffset;
          }
  
          public int read() throws IOException {
              int b = 0;
              if (fOffset < fLength) {
                  return fData[fOffset++] & 0xff;
              }
              if (fOffset == fEndOffset) {
                  return -1;
              }
              if (fOffset == fData.length) {
                  byte[] newData = new byte[fOffset << 1];
                  System.arraycopy(fData, 0, newData, 0, fOffset);
                  fData = newData;
              }
              b = fInputStream.read();
              if (b == -1) {
                  fEndOffset = fOffset;
                  return -1;
              }
              fData[fLength++] = (byte)b;
              fOffset++;
              return b & 0xff;
          }
  
          public int read(byte[] b, int off, int len) throws IOException {
              int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  // better get some more for the voracious reader...
                  if (fCurrentEntity.mayReadChunks) {
                      return fInputStream.read(b, off, len);
                  }
                  int returnedVal = read();
                  if (returnedVal == -1) {
                      fEndOffset = fOffset;
                      return -1;
                  }
                  b[off] = (byte)returnedVal;
                  return 1;
              }
              if (len < bytesLeft) {
                  if (len <= 0) {
                      return 0;
                  }
              }
              else {
                  len = bytesLeft;
              }
              if (b != null) {
                  System.arraycopy(fData, fOffset, b, off, len);
              }
              fOffset += len;
              return len;
          }
  
          public long skip(long n)
              throws IOException
          {
              int bytesLeft;
              if (n <= 0) {
                  return 0;
              }
              bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return 0;
                  }
                  return fInputStream.skip(n);
              }
              if (n <= bytesLeft) {
                  fOffset += n;
                  return n;
              }
              fOffset += bytesLeft;
              if (fOffset == fEndOffset) {
                  return bytesLeft;
              }
              n -= bytesLeft;
  	    /*
  	     * In a manner of speaking, when this class isn't permitting more
  	     * than one byte at a time to be read, it is "blocking".  The
  	     * available() method should indicate how much can be read without
  	     * blocking, so while we're in this mode, it should only indicate
  	     * that bytes in its buffer are available; otherwise, the result of
  	     * available() on the underlying InputStream is appropriate.
  	     */
              return fInputStream.skip(n) + bytesLeft;
          }
  
          public int available() throws IOException {
              int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  return fCurrentEntity.mayReadChunks ? fInputStream.available()
  		    : 0;
              }
              return bytesLeft;
          }
  
          public void mark(int howMuch) {
              fMark = fOffset;
          }
  
          public void reset() {
              fOffset = fMark;
          }
  
          public boolean markSupported() {
              return true;
          }
  
          public void close() throws IOException {
              if (fInputStream != null) {
                  fInputStream.close();
                  fInputStream = null;
              }
          }
      } // end of RewindableInputStream class
  
      // Adapted from:
      // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
      private void scanXMLDecl() throws IOException, JasperException {
  
  	if (skipString("<?xml")) {
  	    fMarkupDepth++;
  	    // NOTE: special case where document starts with a PI
  	    //       whose name starts with "xml" (e.g. "xmlfoo")
  	    if (XMLChar.isName(peekChar())) {
  		fStringBuffer.clear();
  		fStringBuffer.append("xml");
  		while (XMLChar.isName(peekChar())) {
  		    fStringBuffer.append((char)scanChar());
  		}
  		String target = fSymbolTable.addSymbol(fStringBuffer.ch,
  						       fStringBuffer.offset,
  						       fStringBuffer.length);
  		scanPIData(target, fString);
  	    }
  
  	    // standard XML declaration
  	    else {
  		scanXMLDeclOrTextDecl(false);
  	    }
  	}
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
      /**
       * Scans an XML or text declaration.
       * <p>
       * <pre>
       * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
       * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
       * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
       * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
       * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
       *                 | ('"' ('yes' | 'no') '"'))
       *
       * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
       * </pre>
       *
       * @param scanningTextDecl True if a text declaration is to
       *                         be scanned instead of an XML
       *                         declaration.
       */
      private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
          throws IOException, JasperException {
  
          // scan decl
          scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
          fMarkupDepth--;
  
          // pseudo-attribute values
          String encodingPseudoAttr = fStrings[1];
  
          // set encoding on reader
          if (encodingPseudoAttr != null) {
              isEncodingSetInProlog = true;
  	    encoding = encodingPseudoAttr;
          }
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
      /**
       * Scans an XML or text declaration.
       * <p>
       * <pre>
       * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
       * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
       * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
       * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
       * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
       *                 | ('"' ('yes' | 'no') '"'))
       *
       * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
       * </pre>
       *
       * @param scanningTextDecl True if a text declaration is to
       *                         be scanned instead of an XML
       *                         declaration.
       * @param pseudoAttributeValues An array of size 3 to return the version,
       *                         encoding and standalone pseudo attribute values
       *                         (in that order).
       *
       * <strong>Note:</strong> This method uses fString, anything in it
       * at the time of calling is lost.
       */
      private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  				       String[] pseudoAttributeValues) 
                  throws IOException, JasperException {
  
          // pseudo-attribute values
          String version = null;
          String encoding = null;
          String standalone = null;
  
          // scan pseudo-attributes
          final int STATE_VERSION = 0;
          final int STATE_ENCODING = 1;
          final int STATE_STANDALONE = 2;
          final int STATE_DONE = 3;
          int state = STATE_VERSION;
  
          boolean dataFoundForTarget = false;
          boolean sawSpace = skipSpaces();
          while (peekChar() != '?') {
              dataFoundForTarget = true;
              String name = scanPseudoAttribute(scanningTextDecl, fString);
              switch (state) {
                  case STATE_VERSION: {
                      if (name == fVersionSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                         ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
                                         : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
                                               null);
                          }
                          version = fString.toString();
                          state = STATE_ENCODING;
                          if (!version.equals("1.0")) {
                              // REVISIT: XML REC says we should throw an error
  			    // in such cases.
                              // some may object the throwing of fatalError.
                              err.jspError("jsp.error.xml.versionNotSupported",
  					 version);
                          }
                      } else if (name == fEncodingSymbol) {
                          if (!scanningTextDecl) {
                              err.jspError("jsp.error.xml.versionInfoRequired");
                          }
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                        : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                      } else {
                          if (scanningTextDecl) {
                              err.jspError("jsp.error.xml.encodingDeclRequired");
                          }
                          else {
                              err.jspError("jsp.error.xml.versionInfoRequired");
                          }
                      }
                      break;
                  }
                  case STATE_ENCODING: {
                      if (name == fEncodingSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                        : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                          // TODO: check encoding name; set encoding on
                          //       entity scanner
                      } else if (!scanningTextDecl && name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              err.jspError("jsp.error.xml.sdDeclInvalid");
                          }
                      } else {
                          err.jspError("jsp.error.xml.encodingDeclRequired");
                      }
                      break;
                  }
                  case STATE_STANDALONE: {
                      if (name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              err.jspError("jsp.error.xml.sdDeclInvalid");
                          }
                      } else {
  			err.jspError("jsp.error.xml.encodingDeclRequired");
                      }
                      break;
                  }
                  default: {
                      err.jspError("jsp.error.xml.noMorePseudoAttributes");
                  }
              }
              sawSpace = skipSpaces();
          }
          // REVISIT: should we remove this error reporting?
          if (scanningTextDecl && state != STATE_DONE) {
              err.jspError("jsp.error.xml.morePseudoAttributes");
          }
          
          // If there is no data in the xml or text decl then we fail to report
  	// error for version or encoding info above.
          if (scanningTextDecl) {
              if (!dataFoundForTarget && encoding == null) {
                  err.jspError("jsp.error.xml.encodingDeclRequired");
              }
          } else {
              if (!dataFoundForTarget && version == null) {
                  err.jspError("jsp.error.xml.versionInfoRequired");
              }
          }
  
          // end
          if (!skipChar('?')) {
              err.jspError("jsp.error.xml.xmlDeclUnterminated");
          }
          if (!skipChar('>')) {
              err.jspError("jsp.error.xml.xmlDeclUnterminated");
  
          }
          
          // fill in return array
          pseudoAttributeValues[0] = version;
          pseudoAttributeValues[1] = encoding;
          pseudoAttributeValues[2] = standalone;
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
      /**
       * Scans a pseudo attribute.
       *
       * @param scanningTextDecl True if scanning this pseudo-attribute for a
       *                         TextDecl; false if scanning XMLDecl. This 
       *                         flag is needed to report the correct type of
       *                         error.
       * @param value            The string to fill in with the attribute 
       *                         value.
       *
       * @return The name of the attribute
       *
       * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
       * at the time of calling is lost.
       */
      public String scanPseudoAttribute(boolean scanningTextDecl, 
                                        XMLString value) 
                  throws IOException, JasperException {
  
          String name = scanName();
          if (name == null) {
              err.jspError("jsp.error.xml.pseudoAttrNameExpected");
          }
          skipSpaces();
          if (!skipChar('=')) {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.eqRequiredInTextDecl"
                               : "jsp.error.xml.eqRequiredInXMLDecl",
  			     name);
          }
          skipSpaces();
          int quote = peekChar();
          if (quote != '\'' && quote != '"') {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.quoteRequiredInTextDecl"
                               : "jsp.error.xml.quoteRequiredInXMLDecl" ,
  			     name);
          }
          scanChar();
          int c = scanLiteral(quote, value);
          if (c != quote) {
              fStringBuffer2.clear();
              do {
                  fStringBuffer2.append(value);
                  if (c != -1) {
                      if (c == '&' || c == '%' || c == '<' || c == ']') {
                          fStringBuffer2.append((char)scanChar());
                      }
                      else if (XMLChar.isHighSurrogate(c)) {
                          scanSurrogates(fStringBuffer2);
                      }
                      else if (XMLChar.isInvalid(c)) {
                          String key = scanningTextDecl
                              ? "jsp.error.xml.invalidCharInTextDecl"
  			    : "jsp.error.xml.invalidCharInXMLDecl";
                          reportFatalError(key, Integer.toString(c, 16));
                          scanChar();
                      }
                  }
                  c = scanLiteral(quote, value);
              } while (c != quote);
              fStringBuffer2.append(value);
              value.setValues(fStringBuffer2);
          }
          if (!skipChar(quote)) {
              reportFatalError(scanningTextDecl ?
  			     "jsp.error.xml.closeQuoteMissingInTextDecl"
                               : "jsp.error.xml.closeQuoteMissingInXMLDecl",
  			     name);
          }
  
          // return
          return name;
  
      }
      
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanPIData
      /**
       * Scans a processing data. This is needed to handle the situation
       * where a document starts with a processing instruction whose 
       * target name <em>starts with</em> "xml". (e.g. xmlfoo)
       *
       * <strong>Note:</strong> This method uses fStringBuffer, anything in it
       * at the time of calling is lost.
       *
       * @param target The PI target
       * @param data The string to fill in with the data
       */
      private void scanPIData(String target, XMLString data) 
          throws IOException, JasperException {
  
          // check target
          if (target.length() == 3) {
              char c0 = Character.toLowerCase(target.charAt(0));
              char c1 = Character.toLowerCase(target.charAt(1));
              char c2 = Character.toLowerCase(target.charAt(2));
              if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
                  err.jspError("jsp.error.xml.reservedPITarget");
              }
          }
  
          // spaces
          if (!skipSpaces()) {
              if (skipString("?>")) {
                  // we found the end, there is no data
                  data.clear();
                  return;
              }
              else {
                  // if there is data there should be some space
                  err.jspError("jsp.error.xml.spaceRequiredInPI");
              }
          }
  
          fStringBuffer.clear();
          // data
          if (scanData("?>", fStringBuffer)) {
              do {
                  int c = peekChar();
                  if (c != -1) {
                      if (XMLChar.isHighSurrogate(c)) {
                          scanSurrogates(fStringBuffer);
                      } else if (XMLChar.isInvalid(c)) {
                          err.jspError("jsp.error.xml.invalidCharInPI",
  				     Integer.toHexString(c));
                          scanChar();
                      }
                  }
              } while (scanData("?>", fStringBuffer));
          }
          data.setValues(fStringBuffer);
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.scanSurrogates
      /**
       * Scans surrogates and append them to the specified buffer.
       * <p>
       * <strong>Note:</strong> This assumes the current char has already been
       * identified as a high surrogate.
       *
       * @param buf The StringBuffer to append the read surrogates to.
       * @returns True if it succeeded.
       */
      private boolean scanSurrogates(XMLStringBuffer buf)
          throws IOException, JasperException {
  
          int high = scanChar();
          int low = peekChar();
          if (!XMLChar.isLowSurrogate(low)) {
              err.jspError("jsp.error.xml.invalidCharInContent",
  			 Integer.toString(high, 16));
              return false;
          }
          scanChar();
  
          // convert surrogates to supplemental character
          int c = XMLChar.supplemental((char)high, (char)low);
  
          // supplemental character must be a valid XML character
          if (!XMLChar.isValid(c)) {
              err.jspError("jsp.error.xml.invalidCharInContent",
  			 Integer.toString(c, 16)); 
              return false;
          }
  
          // fill in the buffer
          buf.append((char)high);
          buf.append((char)low);
  
          return true;
  
      }
  
      // Adapted from:
      // org.apache.xerces.impl.XMLScanner.reportFatalError
      /**
       * Convenience function used in all XML scanners.
       */
      private void reportFatalError(String msgId, String arg)
                  throws JasperException {
          err.jspError(msgId, arg);
      }
  
  }
  
  
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>

Re: cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser ASCIIReader.java UCSReader.java UTF8Reader.java XMLEncodingDetector.java

Posted by Remy Maucherat <re...@apache.org>.

luehe@apache.org wrote:

> luehe       2002/11/06 12:14:20
>
>   Modified:    jasper2/src/share/org/apache/jasper/compiler
>                         ErrorDispatcher.java JspReader.java JspUtil.java
>                         PageDataImpl.java PageInfo.java
>                         ParserController.java Validator.java
>                jasper2/src/share/org/apache/jasper/resources
>                         messages.properties messages_es.properties
>                         messages_ja.properties
>   Added:       jasper2/src/share/org/apache/jasper/xmlparser
>                         ASCIIReader.java UCSReader.java UTF8Reader.java
>                         XMLEncodingDetector.java
>   Log:
>   First cut at I18N changes.
>   

There are problems with that patch:

- What does it do ?

- It seems like a big change, was it discussed before on the list ?

- Trying to access the admin webapp:
java.lang.StackOverflowError
	at org.apache.jasper.xmlparser.UTF8Reader.read(UTF8Reader.java:293)
	at 
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1041)
	at 
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1065)
	at 
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1065)
(you get the idea)

Thanks,
Remy


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>