You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by lu...@apache.org on 2002/11/06 21:14:20 UTC
cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser ASCIIReader.java UCSReader.java UTF8Reader.java XMLEncodingDetector.java
luehe 2002/11/06 12:14:20
Modified: jasper2/src/share/org/apache/jasper/compiler
ErrorDispatcher.java JspReader.java JspUtil.java
PageDataImpl.java PageInfo.java
ParserController.java Validator.java
jasper2/src/share/org/apache/jasper/resources
messages.properties messages_es.properties
messages_ja.properties
Added: jasper2/src/share/org/apache/jasper/xmlparser
ASCIIReader.java UCSReader.java UTF8Reader.java
XMLEncodingDetector.java
Log:
First cut at I18N changes.
Revision Changes Path
1.7 +22 -24 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ErrorDispatcher.java
Index: ErrorDispatcher.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ErrorDispatcher.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- ErrorDispatcher.java 4 Nov 2002 20:18:24 -0000 1.6
+++ ErrorDispatcher.java 6 Nov 2002 20:14:19 -0000 1.7
@@ -96,9 +96,6 @@
errHandler = new DefaultErrorHandler(this);
}
- //*********************************************************************
- // Package-scoped utility methods
-
/*
* Dispatches the given JSP parse error to the configured error handler.
*
@@ -108,7 +105,7 @@
*
* @param errCode Error code
*/
- void jspError(String errCode) throws JasperException {
+ public void jspError(String errCode) throws JasperException {
dispatch(null, errCode, null, null);
}
@@ -122,7 +119,7 @@
* @param where Error location
* @param errCode Error code
*/
- void jspError(Mark where, String errCode) throws JasperException {
+ public void jspError(Mark where, String errCode) throws JasperException {
dispatch(where, errCode, null, null);
}
@@ -136,7 +133,7 @@
* @param n Node that caused the error
* @param errCode Error code
*/
- void jspError(Node n, String errCode) throws JasperException {
+ public void jspError(Node n, String errCode) throws JasperException {
dispatch(n.getStart(), errCode, null, null);
}
@@ -150,7 +147,7 @@
* @param errCode Error code
* @param arg Argument for parametric replacement
*/
- void jspError(String errCode, String arg) throws JasperException {
+ public void jspError(String errCode, String arg) throws JasperException {
dispatch(null, errCode, new Object[] {arg}, null);
}
@@ -165,7 +162,7 @@
* @param errCode Error code
* @param arg Argument for parametric replacement
*/
- void jspError(Mark where, String errCode, String arg)
+ public void jspError(Mark where, String errCode, String arg)
throws JasperException {
dispatch(where, errCode, new Object[] {arg}, null);
}
@@ -181,7 +178,7 @@
* @param errCode Error code
* @param arg Argument for parametric replacement
*/
- void jspError(Node n, String errCode, String arg)
+ public void jspError(Node n, String errCode, String arg)
throws JasperException {
dispatch(n.getStart(), errCode, new Object[] {arg}, null);
}
@@ -197,7 +194,7 @@
* @param arg1 First argument for parametric replacement
* @param arg2 Second argument for parametric replacement
*/
- void jspError(String errCode, String arg1, String arg2)
+ public void jspError(String errCode, String arg1, String arg2)
throws JasperException {
dispatch(null, errCode, new Object[] {arg1, arg2}, null);
}
@@ -214,7 +211,7 @@
* @param arg1 First argument for parametric replacement
* @param arg2 Second argument for parametric replacement
*/
- void jspError(Mark where, String errCode, String arg1, String arg2)
+ public void jspError(Mark where, String errCode, String arg1, String arg2)
throws JasperException {
dispatch(where, errCode, new Object[] {arg1, arg2}, null);
}
@@ -231,7 +228,7 @@
* @param arg1 First argument for parametric replacement
* @param arg2 Second argument for parametric replacement
*/
- void jspError(Node n, String errCode, String arg1, String arg2)
+ public void jspError(Node n, String errCode, String arg1, String arg2)
throws JasperException {
dispatch(n.getStart(), errCode, new Object[] {arg1, arg2}, null);
}
@@ -241,7 +238,7 @@
*
* @param e Parsing exception
*/
- void jspError(Exception e) throws JasperException {
+ public void jspError(Exception e) throws JasperException {
dispatch(null, null, null, e);
}
@@ -256,7 +253,7 @@
* @param arg Argument for parametric replacement
* @param e Parsing exception
*/
- void jspError(String errCode, String arg, Exception e)
+ public void jspError(String errCode, String arg, Exception e)
throws JasperException {
dispatch(null, errCode, new Object[] {arg}, e);
}
@@ -273,7 +270,7 @@
* @param arg Argument for parametric replacement
* @param e Parsing exception
*/
- void jspError(Node n, String errCode, String arg, Exception e)
+ public void jspError(Node n, String errCode, String arg, Exception e)
throws JasperException {
dispatch(n.getStart(), errCode, new Object[] {arg}, e);
}
@@ -287,7 +284,7 @@
* @param page Node representation of JSP page from which the Java source
* file was generated
*/
- void javacError(String errMsg, String fname, Node.Nodes page)
+ public void javacError(String errMsg, String fname, Node.Nodes page)
throws JasperException, IOException {
JavacErrorDetail[] errDetails = parseJavacMessage(errMsg, fname, page);
errHandler.javacError(errDetails);
@@ -304,7 +301,7 @@
*
* @return Localized error message
*/
- String getString(String errCode) {
+ public String getString(String errCode) {
String errMsg = errCode;
try {
errMsg = bundle.getString(errCode);
@@ -325,7 +322,7 @@
*
* @return Localized error message
*/
- String getString(String errCode, String arg) {
+ public String getString(String errCode, String arg) {
return getString(errCode, new Object[] {arg});
}
@@ -342,7 +339,7 @@
*
* @return Localized error message
*/
- String getString(String errCode, String arg1, String arg2) {
+ public String getString(String errCode, String arg1, String arg2) {
return getString(errCode, new Object[] {arg1, arg2});
}
@@ -360,7 +357,8 @@
*
* @return Localized error message
*/
- String getString(String errCode, String arg1, String arg2, String arg3) {
+ public String getString(String errCode, String arg1, String arg2,
+ String arg3) {
return getString(errCode, new Object[] {arg1, arg2, arg3});
}
@@ -376,7 +374,7 @@
*
* @return Localized error message
*/
- String getString(String errCode, Object[] args) {
+ public String getString(String errCode, Object[] args) {
String errMsg = errCode;
try {
errMsg = bundle.getString(errCode);
1.11 +25 -7 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspReader.java
Index: JspReader.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspReader.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- JspReader.java 10 Oct 2002 00:49:21 -0000 1.10
+++ JspReader.java 6 Nov 2002 20:14:19 -0000 1.11
@@ -57,6 +57,7 @@
import java.io.*;
import java.util.*;
+import java.util.jar.JarFile;
import org.apache.jasper.Constants;
import org.apache.jasper.JasperException;
import org.apache.jasper.JspCompilationContext;
@@ -94,29 +95,44 @@
/*
* Set to true when using the JspReader on a single file where we read up
* to the end and reset to the beginning many times.
- * (as in ParserCtl.figureOutJspDocument().
+ * (as in ParserController.figureOutJspDocument()).
*/
private boolean singleFile;
/*
* Constructor.
*/
- public JspReader(JspCompilationContext ctx,
- String file,
+ public JspReader(JspCompilationContext ctxt,
+ String fname,
+ String encoding,
+ JarFile jarFile,
+ ErrorDispatcher err)
+ throws JasperException, FileNotFoundException, IOException {
+
+ this(ctxt, fname, encoding,
+ JspUtil.getReader(fname, encoding, jarFile, ctxt, err),
+ err);
+ }
+
+ /*
+ * Constructor.
+ */
+ public JspReader(JspCompilationContext ctxt,
+ String fname,
String encoding,
InputStreamReader reader,
ErrorDispatcher err)
throws JasperException, FileNotFoundException {
- this.context = ctx;
+ this.context = ctxt;
this.err = err;
sourceFiles = new Vector();
currFileId = 0;
size = 0;
singleFile = false;
- loghelper = new Logger.Helper("JASPER_LOG", "JspReader");
- pushFile2(file, encoding, reader);
+ loghelper = new Logger.Helper("JASPER_LOG", "JspReader");
+ pushFile2(fname, encoding, reader);
}
String getFile(int fileid) {
@@ -553,7 +569,9 @@
err.jspError("jsp.error.file.cannot.read", "ze file");
} finally {
if (reader != null) {
- try { reader.close(); } catch (Exception any) {}
+ try {
+ reader.close();
+ } catch (Exception any) {}
}
}
}
1.21 +52 -8 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspUtil.java
Index: JspUtil.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/JspUtil.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- JspUtil.java 17 Oct 2002 20:43:06 -0000 1.20
+++ JspUtil.java 6 Nov 2002 20:14:19 -0000 1.21
@@ -62,18 +62,18 @@
import java.net.URL;
-import java.io.CharArrayWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.FileInputStream;
+import java.io.*;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.NoSuchElementException;
import java.util.Vector;
import java.util.StringTokenizer;
+import java.util.jar.JarFile;
+import java.util.zip.ZipEntry;
import org.apache.jasper.Constants;
+import org.apache.jasper.JspCompilationContext;
import org.apache.jasper.JasperException;
import org.apache.jasper.logging.Logger;
@@ -97,7 +97,7 @@
* @author Shawn Bayern
* @author Mark Roth
*/
-class JspUtil {
+public class JspUtil {
// Delimiters for request-time expressions (JSP and XML syntax)
private static final String OPEN_EXPR = "<%=";
@@ -716,6 +716,50 @@
public Class[] getParameterTypes() {
return this.parameterTypes;
}
+ }
+
+ public static InputStream getInputStream(String fname, JarFile jarFile,
+ JspCompilationContext ctxt,
+ ErrorDispatcher err)
+ throws JasperException, IOException {
+
+ InputStream in = null;
+
+ if (jarFile != null) {
+ String jarEntryName = fname.substring(1, fname.length());
+ ZipEntry jarEntry = jarFile.getEntry(jarEntryName);
+ if (jarEntry == null) {
+ err.jspError("jsp.error.file.not.found", fname);
+ }
+ in = jarFile.getInputStream(jarEntry);
+ } else {
+ in = ctxt.getResourceAsStream(fname);
+ }
+
+ if (in == null) {
+ err.jspError("jsp.error.file.not.found", fname);
+ }
+
+ return in;
+ }
+
+
+ static InputStreamReader getReader(String fname, String encoding,
+ JarFile jarFile,
+ JspCompilationContext ctxt,
+ ErrorDispatcher err)
+ throws JasperException, IOException {
+
+ InputStreamReader reader = null;
+ InputStream in = getInputStream(fname, jarFile, ctxt, err);
+
+ try {
+ reader = new InputStreamReader(in, encoding);
+ } catch (UnsupportedEncodingException ex) {
+ err.jspError("jsp.error.unsupported.encoding", encoding);
+ }
+
+ return reader;
}
}
1.15 +19 -6 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageDataImpl.java
Index: PageDataImpl.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageDataImpl.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- PageDataImpl.java 30 Oct 2002 18:20:21 -0000 1.14
+++ PageDataImpl.java 6 Nov 2002 20:14:19 -0000 1.15
@@ -120,7 +120,8 @@
*
* @param page the page nodes from which to generate the XML view
*/
- public PageDataImpl(Node.Nodes page) throws JasperException {
+ public PageDataImpl(Node.Nodes page, PageInfo pageInfo)
+ throws JasperException {
// First pass
FirstPassVisitor firstPassVisitor
@@ -130,7 +131,7 @@
// Second pass
buf = new StringBuffer();
SecondPassVisitor secondPassVisitor
- = new SecondPassVisitor(page.getRoot(), buf);
+ = new SecondPassVisitor(page.getRoot(), buf, pageInfo);
page.visit(secondPassVisitor);
}
@@ -236,6 +237,7 @@
private Node.Root root;
private StringBuffer buf;
+ private PageInfo pageInfo;
// current jsp:id attribute value
private int jspId;
@@ -243,9 +245,11 @@
/*
* Constructor
*/
- public SecondPassVisitor(Node.Root root, StringBuffer buf) {
+ public SecondPassVisitor(Node.Root root, StringBuffer buf,
+ PageInfo pageInfo) {
this.root = root;
this.buf = buf;
+ this.pageInfo = pageInfo;
}
/*
@@ -254,6 +258,7 @@
public void visit(Node.Root n) throws JasperException {
if (n == this.root) {
// top-level page
+ appendXmlProlog();
appendTag(JSP_ROOT, n.getAttributes(), n.getBody(), null);
} else {
visitBody(n);
@@ -269,6 +274,7 @@
public void visit(Node.JspRoot n) throws JasperException {
if (n == this.root) {
// top-level jsp:root element
+ appendXmlProlog();
appendTag(JSP_ROOT, n.getAttributes(), n.getBody(), null);
} else {
visitBody(n);
@@ -525,6 +531,13 @@
buf.append(" ").append(name).append("=\"");
buf.append(JspUtil.getExprInXml(value)).append("\"\n");
}
+ }
+
+ /*
+ * Appends XML prolog with encoding declaration.
+ */
+ private void appendXmlProlog() {
+ buf.append("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
}
}
}
1.14 +38 -7 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java
Index: PageInfo.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- PageInfo.java 1 Nov 2002 02:54:41 -0000 1.13
+++ PageInfo.java 6 Nov 2002 20:14:19 -0000 1.14
@@ -65,7 +65,7 @@
import org.apache.jasper.Constants;
/**
- * A repository for various info about the page under compilation
+ * A repository for various info about the translation unit under compilation.
*
* @author Kin-man Chung
*/
@@ -88,15 +88,30 @@
private boolean isErrorPage = false;
private String errorPage = null;
private String pageEncoding = null;
+
+ /*
+ * Auto-detected encoding, or encoding specified in XML prolog
+ * (declaration).
+ * Only meaningful for XML documents.
+ */
+ private String xmlEncoding = null;
+
+ // Indicates whether page has XML declaration with encoding attribute
+ private boolean hasEncodingProlog = false;
+
private int maxTagNesting = 0;
private boolean scriptless = false;
private boolean scriptingInvalid = false;
private boolean elIgnored = false;
private boolean elIgnoredSpecified = false;
private boolean isXml = false;
- private boolean isXmlSpecified = false; // true is there is a is-xml
- // element in jsp-config
- private boolean hasTagFile = false; // A custom tag is a tag file
+
+ // true is there is a is-xml element in jsp-config
+ private boolean isXmlSpecified = false;
+
+ // A custom tag is a tag file
+ private boolean hasTagFile = false;
+
private boolean hasJspRoot = false;
private Vector includePrelude;
private Vector includeCoda;
@@ -218,6 +233,22 @@
public String getPageEncoding() {
return pageEncoding;
+ }
+
+ public void setXmlEncoding(String xmlEncoding) {
+ this.xmlEncoding = xmlEncoding;
+ }
+
+ public String getXmlEncoding() {
+ return xmlEncoding;
+ }
+
+ public void setHasEncodingProlog(boolean hasEncodingProlog) {
+ this.hasEncodingProlog = hasEncodingProlog;
+ }
+
+ public boolean hasEncodingProlog() {
+ return hasEncodingProlog;
}
public int getMaxTagNesting() {
1.24 +112 -113 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java
Index: ParserController.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- ParserController.java 28 Oct 2002 23:21:08 -0000 1.23
+++ ParserController.java 6 Nov 2002 20:14:19 -0000 1.24
@@ -63,6 +63,7 @@
import org.xml.sax.Attributes;
import org.apache.jasper.*;
import org.apache.jasper.logging.Logger;
+import org.apache.jasper.xmlparser.XMLEncodingDetector;
/**
* Controller for the parsing of a JSP page.
@@ -75,30 +76,34 @@
* the proper parser.
*
* @author Pierre Delisle
+ * @author Jan Luehe
*/
class ParserController {
+ private static final String CHARSET = "charset=";
+
private JspCompilationContext ctxt;
private Compiler compiler;
+ private PageInfo pageInfo;
private ErrorDispatcher err;
/*
- * A stack to keep track of the 'current base directory'
- * for include directives that refer to relative paths.
- */
- private Stack baseDirStack = new Stack();
-
- /*
* Document information which tells us what
* kind of document we are dealing with.
*/
private boolean isXml;
+
+ /*
+ * A stack to keep track of the 'current base directory'
+ * for include directives that refer to relative paths.
+ */
+ private Stack baseDirStack = new Stack();
/*
* Static information used in the process of figuring out
* the kind of document we're dealing with.
*/
- private static final String JSP_ROOT_TAG = "<jsp:root";
+ private static final String JSP_ROOT_TAG = "<jsp:root";
/*
* Tells if the file being processed is the "top" file
@@ -107,24 +112,12 @@
private boolean isTopFile = true;
/*
- * The encoding of the "top" file. This encoding is used
- * for included files by default.
- * Defaults to "ISO-8859-1" per JSP spec.
- */
- private String topFileEncoding = "ISO-8859-1";
-
- /*
- * The 'new' encoding required to read a page.
- */
- private String newEncoding;
-
-
- /*
* Constructor
*/
public ParserController(JspCompilationContext ctxt, Compiler compiler) {
this.ctxt = ctxt; // @@@ can we assert that ctxt is not null?
this.compiler = compiler;
+ this.pageInfo = compiler.getPageInfo();
this.err = compiler.getErrorDispatcher();
}
@@ -187,34 +180,29 @@
throws FileNotFoundException, JasperException, IOException {
Node.Nodes parsedPage = null;
- String encoding = topFileEncoding;
InputStreamReader reader = null;
String absFileName = resolveFileName(inFileName);
JarFile jarFile = (JarFile) ctxt.getTagFileJars().get(inFileName);
try {
- // Figure out what type of JSP document we are dealing with
- reader = getReader(absFileName, encoding, jarFile);
- figureOutJspDocument(absFileName, encoding, reader);
- if (newEncoding != null)
- encoding = newEncoding;
+ // Figure out what type of JSP document and encoding type we are
+ // dealing with
+ String encoding = figureOutJspDocument(absFileName, jarFile);
+
if (isTopFile) {
- // Set the "top level" file encoding that will be used
- // for all included files where encoding is not defined.
- topFileEncoding = encoding;
+ pageInfo.setIsXml(isXml);
+ if (isXml) {
+ pageInfo.setXmlEncoding(encoding);
+ }
isTopFile = false;
} else {
- compiler.getPageInfo().addDependant(absFileName);
- }
- try {
- reader.close();
- } catch (IOException ex) {
+ compiler.getPageInfo().addDependant(absFileName);
}
// dispatch to the proper parser
-
- reader = getReader(absFileName, encoding, jarFile);
+ reader = JspUtil.getReader(absFileName, encoding, jarFile, ctxt,
+ err);
if (isXml) {
parsedPage = JspDocumentParser.parse(this, absFileName,
reader, parent,
@@ -240,40 +228,68 @@
}
/**
- * Discover the properties of the page by scanning it.
- * Properties to find out are:
- * - Is it in XML syntax?
- * - What is the the page encoding
+ * Determines the properties of the given page or tag file.
+ * The properties to be determined are:
+ *
+ * - Syntax (JSP or XML).
+ * This information is supplied by setting the instance variable
+ * 'isXml'.
+ *
+ * - Source Encoding.
+ * This information is supplied as the return value.
+ *
* If these properties are already specified in the jsp-config element
* in web.xml, then they are used.
+ *
+ * @return The source encoding
*/
- private void figureOutJspDocument(String file,
- String encoding,
- InputStreamReader reader)
- throws JasperException
- {
- newEncoding = null;
- PageInfo pageInfo = compiler.getPageInfo();
+ private String figureOutJspDocument(String fname, JarFile jarFile)
+ throws JasperException, IOException {
+
boolean isXmlFound = false;
+ isXml = false;
+
if (pageInfo.isXmlSpecified()) {
// If <is-xml> is specified in a <jsp-property-group>, it is used.
isXml = pageInfo.isXml();
isXmlFound = true;
- } else if (file.endsWith(".jspx")) {
+ } else if (fname.endsWith(".jspx")) {
isXml = true;
isXmlFound = true;
}
- if (pageInfo.getPageEncoding() != null) {
- newEncoding = pageInfo.getPageEncoding();
+ String sourceEnc = null;
+ if (isXmlFound && !isXml) {
+ // JSP syntax
+ if (pageInfo.getPageEncoding() != null) {
+ // encoding specified in jsp-config (used only by JSP syntax)
+ return pageInfo.getPageEncoding();
+ } else {
+ // We don't know the encoding
+ sourceEnc = "ISO-8859-1";
+ }
+ } else {
+ // XML syntax or unknown, autodetect encoding ...
+ Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile,
+ ctxt, err);
+ sourceEnc = (String) ret[0];
+ boolean isEncodingSetInProlog = ((Boolean) ret[1]).booleanValue();
+ if (isTopFile) {
+ pageInfo.setHasEncodingProlog(isEncodingSetInProlog);
+ }
+ if (isEncodingSetInProlog) {
+ // Prolog present only in XML syntax
+ isXml = true;
+ }
}
- if (isXmlFound && newEncoding != null)
- return; // No need to scan the file
+ if (isXml) {
+ return sourceEnc;
+ }
- JspReader jspReader;
+ JspReader jspReader = null;
try {
- jspReader = new JspReader(ctxt, file, encoding, reader, err);
+ jspReader = new JspReader(ctxt, fname, sourceEnc, jarFile, err);
} catch (FileNotFoundException ex) {
throw new JasperException(ex);
}
@@ -288,47 +304,60 @@
Mark mark = jspReader.skipUntil(JSP_ROOT_TAG);
if (mark != null) {
isXml = true;
+ return sourceEnc;
} else {
isXml = false;
}
}
- if (newEncoding != null) {
- // encoding specified in jsp-config
- return;
+ // At this point we know it's JSP syntax ...
+ if (pageInfo.getPageEncoding() != null) {
+ return pageInfo.getPageEncoding();
+ } else {
+ return getSourceEncodingForJspSyntax(jspReader, startMark);
}
-
- // Figure out the encoding of the page
- // FIXME: We assume xml parser will take care of
- // encoding for page in XML syntax. Correct?
- if (!isXml) {
- jspReader.reset(startMark);
- while (jspReader.skipUntil("<%@") != null) {
+ }
+
+ /*
+ * Determines page source encoding for JSP page or tag file in JSP syntax
+ */
+ private String getSourceEncodingForJspSyntax(JspReader jspReader,
+ Mark startMark)
+ throws JasperException {
+
+ String encoding = null;
+
+ jspReader.reset(startMark);
+ while (jspReader.skipUntil("<%@") != null) {
+ jspReader.skipSpaces();
+ // compare for "tag ", so we don't match "taglib"
+ if (jspReader.matches("tag ") || jspReader.matches("page")) {
jspReader.skipSpaces();
- if (jspReader.matches( "tag " ) || jspReader.matches("page")) {
- jspReader.skipSpaces();
- Attributes attrs = Parser.parseAttributes(this, jspReader);
- String attribute = "pageEncoding";
- newEncoding = attrs.getValue("pageEncoding");
- if (newEncoding == null) {
- String contentType = attrs.getValue("contentType");
- if (contentType != null) {
- int loc = contentType.indexOf("charset=");
- if (loc != -1) {
- newEncoding = contentType.substring(loc+8);
- return;
- }
- }
- if (newEncoding == null)
- newEncoding = "ISO-8859-1";
- } else {
- return;
+ Attributes attrs = Parser.parseAttributes(this, jspReader);
+ encoding = attrs.getValue("pageEncoding");
+ if (encoding != null) {
+ break;
+ }
+ String contentType = attrs.getValue("contentType");
+ if (contentType != null) {
+ int loc = contentType.indexOf(CHARSET);
+ if (loc != -1) {
+ encoding = contentType.substring(loc
+ + CHARSET.length());
+ break;
}
}
}
}
+
+ if (encoding == null) {
+ // Default to "ISO-8859-1" per JSP spec
+ encoding = "ISO-8859-1";
+ }
+
+ return encoding;
}
-
+
/*
* Resolve the name of the file and update
* baseDirStack() to keep track ot the current
@@ -348,34 +377,4 @@
return fileName;
}
- private InputStreamReader getReader(String file, String encoding,
- JarFile jarFile)
- throws JasperException, IOException {
-
- InputStream in = null;
- InputStreamReader reader = null;
-
- if (jarFile != null) {
- String jarEntryName = file.substring(1, file.length());
- ZipEntry jarEntry = jarFile.getEntry(jarEntryName);
- if (jarEntry == null) {
- err.jspError("jsp.error.file.not.found", file);
- }
- in = jarFile.getInputStream(jarEntry);
- } else {
- in = ctxt.getResourceAsStream(file);
- }
-
- if (in == null) {
- err.jspError("jsp.error.file.not.found", file);
- }
-
- try {
- reader = new InputStreamReader(in, encoding);
- } catch (UnsupportedEncodingException ex) {
- err.jspError("jsp.error.unsupported.encoding", encoding);
- }
-
- return reader;
- }
}
1.50 +20 -10 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java
Index: Validator.java
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -r1.49 -r1.50
--- Validator.java 30 Oct 2002 17:41:22 -0000 1.49
+++ Validator.java 6 Nov 2002 20:14:19 -0000 1.50
@@ -1020,8 +1020,7 @@
*/
page.visit(new DirectiveVisitor(compiler));
- // Determine the default output content type, per errata_a
- // http://jcp.org/aboutJava/communityprocess/maintenance/jsr053/errata_1_2_a_20020321.html
+ // Determine the default output content type
PageInfo pageInfo = compiler.getPageInfo();
String contentType = pageInfo.getContentType();
if (!compiler.getCompilationContext().isTagFile() &&
@@ -1033,10 +1032,20 @@
} else {
defaultType = contentType;
}
- String charset = pageInfo.getPageEncoding();
- if (charset == null)
- charset = isXml? "UTF-8": "ISO-8859-1";
- pageInfo.setContentType(defaultType + ";charset=" + charset);
+
+ String charset = null;
+ if (isXml) {
+ charset = "UTF-8";
+ } else {
+ charset = pageInfo.getPageEncoding();
+ // The resulting charset might be null
+ }
+
+ if (charset != null) {
+ pageInfo.setContentType(defaultType + ";charset=" + charset);
+ } else {
+ pageInfo.setContentType(defaultType);
+ }
}
/*
@@ -1051,7 +1060,8 @@
* Invoke TagLibraryValidator classes of all imported tags
* (second validation step for custom tags according to JSP.10.5).
*/
- validateXmlView(new PageDataImpl(page), compiler);
+ validateXmlView(new PageDataImpl(page, compiler.getPageInfo()),
+ compiler);
/*
* Invoke TagExtraInfo method isValid() for all imported tags
1.55 +34 -1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties
Index: messages.properties
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -r1.54 -r1.55
--- messages.properties 6 Nov 2002 11:38:35 -0000 1.54
+++ messages.properties 6 Nov 2002 20:14:20 -0000 1.55
@@ -305,3 +305,36 @@
jasper.error.emptybodycontent.nonempty=According to TLD, tag {0} must be empty, but is not
jsp.error.tagfile.var_name_given_equals_attr_name=In tag file {0}, the name-given attribute ({1}) of a variable directive equals the name attribute of an attribute directive
jsp.error.useBean.noSession=Illegal for useBean to use session scope when JSP page declares (via page directive) that it does not participate in sessions
+jsp.error.xml.encodingByteOrderUnsupported = Given byte order for encoding \"{0}\" is not supported.
+jsp.error.xml.encodingDeclInvalid = Invalid encoding name \"{0}\".
+jsp.error.xml.encodingDeclRequired = The encoding declaration is required in the text declaration.
+jsp.error.xml.morePseudoAttributes = more pseudo attributes is expected.
+jsp.error.xml.noMorePseudoAttributes = no more pseudo attributes is allowed.
+jsp.error.xml.versionInfoRequired = The version is required in the XML declaration.
+jsp.error.xml.xmlDeclUnterminated = The XML declaration must end with \"?>\".
+jsp.error.xml.reservedPITarget = The processing instruction target matching \"[xX][mM][lL]\" is not allowed.
+jsp.error.xml.spaceRequiredInPI = White space is required between the processing instruction target and data.
+jsp.error.xml.invalidCharInContent = An invalid XML character (Unicode: 0x{0}) was found in the element content of the document.
+jsp.error.xml.spaceRequiredBeforeStandalone = White space is required before the encoding pseudo attribute in the XML declaration.
+jsp.error.xml.sdDeclInvalid = The standalone document declaration value must be \"yes\" or \"no\", not \"{0}\".
+jsp.error.xml.invalidCharInPI = An invalid XML character (Unicode: 0x{0}) was found in the processing instruction.
+jsp.error.xml.versionNotSupported = XML version \"{0}\" is not supported, only XML 1.0 is supported.
+jsp.error.xml.pseudoAttrNameExpected = a pseudo attribute name is expected.
+jsp.error.xml.expectedByte = Expected byte {0} of {1}-byte UTF-8 sequence.
+jsp.error.xml.invalidByte = Invalid byte {0} of {1}-byte UTF-8 sequence.
+jsp.error.xml.operationNotSupported = Operation \"{0}\" not supported by {1} reader.
+jsp.error.xml.invalidHighSurrogate = High surrogate bits in UTF-8 sequence must not exceed 0x10 but found 0x{0}.
+jsp.error.xml.invalidASCII = Byte \"{0}\" not 7-bit ASCII.
+jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl = White space is required before the encoding pseudo attribute in the XML declaration.
+jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl = White space is required before the encoding pseudo attribute in the text declaration.
+jsp.error.xml.spaceRequiredBeforeVersionInTextDecl = White space is required before the version pseudo attribute in the text declaration.
+jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl = White space is required before the version pseudo attribute in the XML declaration.
+jsp.error.xml.eqRequiredInXMLDecl = The '' = '' character must follow \"{0}\" in the XML declaration.
+jsp.error.xml.eqRequiredInTextDecl = The '' = '' character must follow \"{0}\" in the text declaration.
+jsp.error.xml.quoteRequiredInTextDecl = The value following \"{0}\" in the text declaration must be a quoted string.
+jsp.error.xml.quoteRequiredInXMLDecl = The value following \"{0}\" in the XML declaration must be a quoted string.
+jsp.error.xml.invalidCharInTextDecl = An invalid XML character (Unicode: 0x{0}) was found in the text declaration.
+jsp.error.xml.invalidCharInXMLDecl = An invalid XML character (Unicode: 0x{0}) was found in the XML declaration.
+jsp.error.xml.closeQuoteMissingInTextDecl = closing quote in the value following \"{0}\" in the text declaration is missing.
+jsp.error.xml.closeQuoteMissingInXMLDecl = closing quote in the value following \"{0}\" in the XML declaration is missing.
+jsp.error.xml.invalidHighSurrogate = High surrogate bits in UTF-8 sequence must not exceed 0x10 but found 0x{0}.
1.21 +34 -1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties
Index: messages_es.properties
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- messages_es.properties 4 Nov 2002 19:15:33 -0000 1.20
+++ messages_es.properties 6 Nov 2002 20:14:20 -0000 1.21
@@ -223,3 +223,36 @@
jasper.error.emptybodycontent.nonempty=
jsp.error.tagfile.var_name_given_equals_attr_name=
jsp.error.useBean.noSession=
+jsp.error.xml.encodingByteOrderUnsupported=
+jsp.error.xml.encodingDeclInvalid=
+jsp.error.xml.encodingDeclRequired=
+jsp.error.xml.morePseudoAttributes=
+jsp.error.xml.noMorePseudoAttributes=
+jsp.error.xml.versionInfoRequired=
+jsp.error.xml.xmlDeclUnterminated=
+jsp.error.xml.reservedPITarget=
+jsp.error.xml.spaceRequiredInPI=
+jsp.error.xml.invalidCharInContent=
+jsp.error.xml.spaceRequiredBeforeStandalone=
+jsp.error.xml.sdDeclInvalid=
+jsp.error.xml.invalidCharInPI=
+jsp.error.xml.versionNotSupported=
+jsp.error.xml.pseudoAttrNameExpected=
+jsp.error.xml.expectedByte=
+jsp.error.xml.invalidByte=
+jsp.error.xml.operationNotSupported=
+jsp.error.xml.invalidHighSurrogate=
+jsp.error.xml.invalidASCII=
+jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl=
+jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl=
+jsp.error.xml.spaceRequiredBeforeVersionInTextDecl=
+jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl=
+jsp.error.xml.eqRequiredInXMLDecl=
+jsp.error.xml.eqRequiredInTextDecl=
+jsp.error.xml.quoteRequiredInTextDecl=
+jsp.error.xml.quoteRequiredInXMLDecl=
+jsp.error.xml.invalidCharInTextDecl=
+jsp.error.xml.invalidCharInXMLDecl=
+jsp.error.xml.closeQuoteMissingInTextDecl=
+jsp.error.xml.closeQuoteMissingInXMLDecl=
+jsp.error.xml.invalidHighSurrogate=
1.21 +34 -1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties
Index: messages_ja.properties
===================================================================
RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- messages_ja.properties 4 Nov 2002 19:15:34 -0000 1.20
+++ messages_ja.properties 6 Nov 2002 20:14:20 -0000 1.21
@@ -254,3 +254,36 @@
jasper.error.emptybodycontent.nonempty=
jsp.error.tagfile.var_name_given_equals_attr_name=
jsp.error.useBean.noSession=
+jsp.error.xml.encodingByteOrderUnsupported=
+jsp.error.xml.encodingDeclInvalid=
+jsp.error.xml.encodingDeclRequired=
+jsp.error.xml.morePseudoAttributes=
+jsp.error.xml.noMorePseudoAttributes=
+jsp.error.xml.versionInfoRequired=
+jsp.error.xml.xmlDeclUnterminated=
+jsp.error.xml.reservedPITarget=
+jsp.error.xml.spaceRequiredInPI=
+jsp.error.xml.invalidCharInContent=
+jsp.error.xml.spaceRequiredBeforeStandalone=
+jsp.error.xml.sdDeclInvalid=
+jsp.error.xml.invalidCharInPI=
+jsp.error.xml.versionNotSupported=
+jsp.error.xml.pseudoAttrNameExpected=
+jsp.error.xml.expectedByte=
+jsp.error.xml.invalidByte=
+jsp.error.xml.operationNotSupported=
+jsp.error.xml.invalidHighSurrogate=
+jsp.error.xml.invalidASCII=
+jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl=
+jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl=
+jsp.error.xml.spaceRequiredBeforeVersionInTextDecl=
+jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl=
+jsp.error.xml.eqRequiredInXMLDecl=
+jsp.error.xml.eqRequiredInTextDecl=
+jsp.error.xml.quoteRequiredInTextDecl=
+jsp.error.xml.quoteRequiredInXMLDecl=
+jsp.error.xml.invalidCharInTextDecl=
+jsp.error.xml.invalidCharInXMLDecl=
+jsp.error.xml.closeQuoteMissingInTextDecl=
+jsp.error.xml.closeQuoteMissingInXMLDecl=
+jsp.error.xml.invalidHighSurrogate=
1.1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/ASCIIReader.java
Index: ASCIIReader.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jasper.xmlparser;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import org.apache.jasper.compiler.ErrorDispatcher;
/**
* A simple ASCII byte reader. This is an optimized reader for reading
* byte streams that only contain 7-bit ASCII characters.
*
* @author Andy Clark, IBM
*
* @version $Id: ASCIIReader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
*/
public class ASCIIReader
extends Reader {
//
// Constants
//
/** Default byte buffer size (2048). */
public static final int DEFAULT_BUFFER_SIZE = 2048;
//
// Data
//
/** Input stream. */
protected InputStream fInputStream;
/** Byte buffer. */
protected byte[] fBuffer;
private ErrorDispatcher err;
//
// Constructors
//
/**
* Constructs an ASCII reader from the specified input stream
* and buffer size.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
* @param err The error dispatcher.
*/
public ASCIIReader(InputStream inputStream, int size,
ErrorDispatcher err) {
fInputStream = inputStream;
fBuffer = new byte[size];
this.err = err;
}
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* <p> Subclasses that intend to support efficient single-character input
* should override this method.
*
* @return The character read, as an integer in the range 0 to 127
* (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
int b0 = fInputStream.read();
if (b0 > 0x80) {
throw new IOException(err.getString("jsp.error.xml.invalidASCII",
Integer.toString(b0)));
}
return b0;
} // read():int
/**
* Read characters into a portion of an array. This method will block
* until some input is available, an I/O error occurs, or the end of the
* stream is reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the
* stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
if (length > fBuffer.length) {
length = fBuffer.length;
}
int count = fInputStream.read(fBuffer, 0, length);
for (int i = 0; i < count; i++) {
int b0 = fBuffer[i];
if (b0 > 0x80) {
throw new IOException(err.getString("jsp.error.xml.invalidASCII",
Integer.toString(b0)));
}
ch[offset + i] = (char)b0;
}
return count;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
return fInputStream.skip(n);
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return fInputStream.markSupported();
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
fInputStream.mark(readAheadLimit);
} // mark(int)
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fInputStream.reset();
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
} // class ASCIIReader
1.1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/UCSReader.java
Index: UCSReader.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jasper.xmlparser;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
/**
* Reader for UCS-2 and UCS-4 encodings.
* (i.e., encodings from ISO-10646-UCS-(2|4)).
*
* @author Neil Graham, IBM
*
* @version $Id: UCSReader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
*/
public class UCSReader extends Reader {
//
// Constants
//
/** Default byte buffer size (8192, larger than that of ASCIIReader
* since it's reasonable to surmise that the average UCS-4-encoded
* file should be 4 times as large as the average ASCII-encoded file).
*/
public static final int DEFAULT_BUFFER_SIZE = 8192;
public static final short UCS2LE = 1;
public static final short UCS2BE = 2;
public static final short UCS4LE = 4;
public static final short UCS4BE = 8;
//
// Data
//
/** Input stream. */
protected InputStream fInputStream;
/** Byte buffer. */
protected byte[] fBuffer;
// what kind of data we're dealing with
protected short fEncoding;
//
// Constructors
//
/**
* Constructs an ASCII reader from the specified input stream
* using the default buffer size. The Endian-ness and whether this is
* UCS-2 or UCS-4 needs also to be known in advance.
*
* @param inputStream The input stream.
* @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
*/
public UCSReader(InputStream inputStream, short encoding) {
this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
} // <init>(InputStream, short)
/**
* Constructs an ASCII reader from the specified input stream
* and buffer size. The Endian-ness and whether this is
* UCS-2 or UCS-4 needs also to be known in advance.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
* @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
*/
public UCSReader(InputStream inputStream, int size, short encoding) {
fInputStream = inputStream;
fBuffer = new byte[size];
fEncoding = encoding;
} // <init>(InputStream,int,short)
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* <p> Subclasses that intend to support efficient single-character input
* should override this method.
*
* @return The character read, as an integer in the range 0 to 127
* (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
int b0 = fInputStream.read() & 0xff;
if (b0 == 0xff)
return -1;
int b1 = fInputStream.read() & 0xff;
if (b1 == 0xff)
return -1;
if(fEncoding >=4) {
int b2 = fInputStream.read() & 0xff;
if (b2 == 0xff)
return -1;
int b3 = fInputStream.read() & 0xff;
if (b3 == 0xff)
return -1;
System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
if (fEncoding == UCS4BE)
return (b0<<24)+(b1<<16)+(b2<<8)+b3;
else
return (b3<<24)+(b2<<16)+(b1<<8)+b0;
} else { // UCS-2
if (fEncoding == UCS2BE)
return (b0<<8)+b1;
else
return (b1<<8)+b0;
}
} // read():int
/**
* Read characters into a portion of an array. This method will block
* until some input is available, an I/O error occurs, or the end of the
* stream is reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the
* stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
int byteLength = length << ((fEncoding >= 4)?2:1);
if (byteLength > fBuffer.length) {
byteLength = fBuffer.length;
}
int count = fInputStream.read(fBuffer, 0, byteLength);
if(count == -1) return -1;
// try and make count be a multiple of the number of bytes we're looking for
if(fEncoding >= 4) { // BigEndian
// this looks ugly, but it avoids an if at any rate...
int numToRead = (4 - (count & 3) & 3);
for(int i=0; i<numToRead; i++) {
int charRead = fInputStream.read();
if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
for (int j = i;j<numToRead; j++)
fBuffer[count+j] = 0;
break;
} else {
fBuffer[count+i] = (byte)charRead;
}
}
count += numToRead;
} else {
int numToRead = count & 1;
if(numToRead != 0) {
count++;
int charRead = fInputStream.read();
if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
fBuffer[count] = 0;
} else {
fBuffer[count] = (byte)charRead;
}
}
}
// now count is a multiple of the right number of bytes
int numChars = count >> ((fEncoding >= 4)?2:1);
int curPos = 0;
for (int i = 0; i < numChars; i++) {
int b0 = fBuffer[curPos++] & 0xff;
int b1 = fBuffer[curPos++] & 0xff;
if(fEncoding >=4) {
int b2 = fBuffer[curPos++] & 0xff;
int b3 = fBuffer[curPos++] & 0xff;
if (fEncoding == UCS4BE)
ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
else
ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
} else { // UCS-2
if (fEncoding == UCS2BE)
ch[offset+i] = (char)((b0<<8)+b1);
else
ch[offset+i] = (char)((b1<<8)+b0);
}
}
return numChars;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
// charWidth will represent the number of bits to move
// n leftward to get num of bytes to skip, and then move the result rightward
// to get num of chars effectively skipped.
// The trick with &'ing, as with elsewhere in this dcode, is
// intended to avoid an expensive use of / that might not be optimized
// away.
int charWidth = (fEncoding >=4)?2:1;
long bytesSkipped = fInputStream.skip(n<<charWidth);
if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
return (bytesSkipped >> charWidth) + 1;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return fInputStream.markSupported();
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
fInputStream.mark(readAheadLimit);
} // mark(int)
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fInputStream.reset();
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
} // class UCSReader
1.1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/UTF8Reader.java
Index: UTF8Reader.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jasper.xmlparser;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UTFDataFormatException;
import org.apache.jasper.compiler.ErrorDispatcher;
/**
* @author Andy Clark, IBM
*
* @version $Id: UTF8Reader.java,v 1.1 2002/11/06 20:14:20 luehe Exp $
*/
public class UTF8Reader
extends Reader {
//
// Constants
//
/** Default byte buffer size (2048). */
public static final int DEFAULT_BUFFER_SIZE = 2048;
// debugging
/** Debug read. */
private static final boolean DEBUG_READ = false;
//
// Data
//
/** Input stream. */
protected InputStream fInputStream;
/** Byte buffer. */
protected byte[] fBuffer;
/** Offset into buffer. */
protected int fOffset;
/** Surrogate character. */
private int fSurrogate = -1;
private ErrorDispatcher err;
//
// Constructors
//
/**
* Constructs a UTF-8 reader from the specified input stream,
* buffer size and MessageFormatter.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
* @param err The error dispatcher.
*/
public UTF8Reader(InputStream inputStream, int size, ErrorDispatcher err) {
fInputStream = inputStream;
fBuffer = new byte[size];
this.err = err;
}
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* <p> Subclasses that intend to support efficient single-character input
* should override this method.
*
* @return The character read, as an integer in the range 0 to 16383
* (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
// decode character
int c = fSurrogate;
if (fSurrogate == -1) {
// NOTE: We use the index into the buffer if there are remaining
// bytes from the last block read. -Ac
int index = 0;
// get first byte
int b0 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b0 == -1) {
return -1;
}
// UTF-8: [0xxx xxxx]
// Unicode: [0000 0000] [0xxx xxxx]
if (b0 < 0x80) {
c = (char)b0;
}
// UTF-8: [110y yyyy] [10xx xxxx]
// Unicode: [0000 0yyy] [yyxx xxxx]
else if ((b0 & 0xE0) == 0xC0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 2);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 2, b1);
}
c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
}
// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
// Unicode: [zzzz yyyy] [yyxx xxxx]
else if ((b0 & 0xF0) == 0xE0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 3);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 3, b1);
}
int b2 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b2 == -1) {
expectedByte(3, 3);
}
if ((b2 & 0xC0) != 0x80) {
invalidByte(3, 3, b2);
}
c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
(b2 & 0x003F);
}
// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
// [1101 11yy] [yyxx xxxx] (low surrogate)
// * uuuuu = wwww + 1
else if ((b0 & 0xF8) == 0xF0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 4);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 3, b1);
}
int b2 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b2 == -1) {
expectedByte(3, 4);
}
if ((b2 & 0xC0) != 0x80) {
invalidByte(3, 3, b2);
}
int b3 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b3 == -1) {
expectedByte(4, 4);
}
if ((b3 & 0xC0) != 0x80) {
invalidByte(4, 4, b3);
}
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
if (uuuuu > 0x10) {
invalidSurrogate(uuuuu);
}
int wwww = uuuuu - 1;
int hs = 0xD800 |
((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) |
((b2 >> 4) & 0x0003);
int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
c = hs;
fSurrogate = ls;
}
// error
else {
invalidByte(1, 1, b0);
}
}
// use surrogate
else {
fSurrogate = -1;
}
// return character
if (DEBUG_READ) {
System.out.println("read(): 0x"+Integer.toHexString(c));
}
return c;
} // read():int
/**
* Read characters into a portion of an array. This method will block
* until some input is available, an I/O error occurs, or the end of the
* stream is reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the
* stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
// handle surrogate
int out = offset;
if (fSurrogate != -1) {
ch[offset + 1] = (char)fSurrogate;
fSurrogate = -1;
length--;
out++;
}
// read bytes
int count = 0;
if (fOffset == 0) {
// adjust length to read
if (length > fBuffer.length) {
length = fBuffer.length;
}
// perform read operation
count = fInputStream.read(fBuffer, 0, length);
if (count == -1) {
return -1;
}
count += out - offset;
}
// skip read; last character was in error
// NOTE: Having an offset value other than zero means that there was
// an error in the last character read. In this case, we have
// skipped the read so we don't consume any bytes past the
// error. By signalling the error on the next block read we
// allow the method to return the most valid characters that
// it can on the previous block read. -Ac
else {
count = fOffset;
fOffset = 0;
}
// convert bytes to characters
final int total = count;
for (int in = 0; in < total; in++) {
int b0 = fBuffer[in] & 0x00FF;
// UTF-8: [0xxx xxxx]
// Unicode: [0000 0000] [0xxx xxxx]
if (b0 < 0x80) {
ch[out++] = (char)b0;
continue;
}
// UTF-8: [110y yyyy] [10xx xxxx]
// Unicode: [0000 0yyy] [yyxx xxxx]
if ((b0 & 0xE0) == 0xC0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 2);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 2, b1);
}
int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
ch[out++] = (char)c;
count -= 1;
continue;
}
// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
// Unicode: [zzzz yyyy] [yyxx xxxx]
if ((b0 & 0xF0) == 0xE0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 3);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 3, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 3);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 3, b2);
}
int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
(b2 & 0x003F);
ch[out++] = (char)c;
count -= 2;
continue;
}
// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
// [1101 11yy] [yyxx xxxx] (low surrogate)
// * uuuuu = wwww + 1
if ((b0 & 0xF8) == 0xF0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 4);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 4, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 4);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 4, b2);
}
int b3 = -1;
if (++in < total) {
b3 = fBuffer[in] & 0x00FF;
}
else {
b3 = fInputStream.read();
if (b3 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
expectedByte(4, 4);
}
count++;
}
if ((b3 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fBuffer[3] = (byte)b3;
fOffset = 4;
return out - offset;
}
invalidByte(4, 4, b2);
}
// decode bytes into surrogate characters
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
if (uuuuu > 0x10) {
invalidSurrogate(uuuuu);
}
int wwww = uuuuu - 1;
int zzzz = b1 & 0x000F;
int yyyyyy = b2 & 0x003F;
int xxxxxx = b3 & 0x003F;
int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
// set characters
ch[out++] = (char)hs;
ch[out++] = (char)ls;
count -= 2;
continue;
}
// error
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
invalidByte(1, 1, b0);
}
// return number of characters converted
if (DEBUG_READ) {
System.out.println("read(char[],"+offset+','+length+"): count="+count);
}
return count;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
long remaining = n;
final char[] ch = new char[fBuffer.length];
do {
int length = ch.length < remaining ? ch.length : (int)remaining;
int count = read(ch, 0, length);
if (count > 0) {
remaining -= count;
}
else {
break;
}
} while (remaining > 0);
long skipped = n - remaining;
return skipped;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return false;
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
throw new IOException(err.getString("jsp.error.xml.operationNotSupported",
"mark()", "UTF-8"));
}
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fOffset = 0;
fSurrogate = -1;
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
//
// Private methods
//
/** Throws an exception for expected byte. */
private void expectedByte(int position, int count)
throws UTFDataFormatException {
throw new UTFDataFormatException(
err.getString("jsp.error.xml.expectedByte",
Integer.toString(position),
Integer.toString(count)));
} // expectedByte(int,int,int)
/** Throws an exception for invalid byte. */
private void invalidByte(int position, int count, int c)
throws UTFDataFormatException {
throw new UTFDataFormatException(
err.getString("jsp.error.xml.invalidByte",
Integer.toString(position),
Integer.toString(count)));
} // invalidByte(int,int,int,int)
/** Throws an exception for invalid surrogate bits. */
private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
throw new UTFDataFormatException(
err.getString("jsp.error.xml.invalidHighSurrogate",
Integer.toHexString(uuuuu)));
} // invalidSurrogate(int)
} // class UTF8Reader
1.1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java
Index: XMLEncodingDetector.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jasper.xmlparser;
import java.io.EOFException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Locale;
import java.util.jar.JarFile;
import org.apache.jasper.JasperException;
import org.apache.jasper.JspCompilationContext;
import org.apache.jasper.compiler.ErrorDispatcher;
import org.apache.jasper.compiler.JspUtil;
import org.apache.xerces.util.EncodingMap;
import org.apache.xerces.util.SymbolTable;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.XMLString;
public class XMLEncodingDetector {
private InputStream stream;
private String encoding;
private boolean isEncodingSetInProlog;
private Boolean isBigEndian;
private Reader reader;
// org.apache.xerces.impl.XMLEntityManager fields
public static final int DEFAULT_BUFFER_SIZE = 2048;
public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
private boolean fAllowJavaEncodings;
private SymbolTable fSymbolTable;
private XMLEncodingDetector fCurrentEntity;
private int fBufferSize = DEFAULT_BUFFER_SIZE;
// org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
private int lineNumber = 1;
private int columnNumber = 1;
private boolean literal;
private char[] ch = new char[DEFAULT_BUFFER_SIZE];
private int position;
private int count;
private boolean mayReadChunks = false;
// org.apache.xerces.impl.XMLScanner fields
private XMLString fString = new XMLString();
private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
private final static String fVersionSymbol = "version";
private final static String fEncodingSymbol = "encoding";
private final static String fStandaloneSymbol = "standalone";
// org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
private int fMarkupDepth = 0;
private String[] fStrings = new String[3];
private ErrorDispatcher err;
/**
* Autodetects the encoding of the XML document supplied by the given
* input stream.
*
* Encoding autodetection is done according to the XML 1.0 specification,
* Appendix F.1: Detection Without External Encoding Information.
*
* @param in The input stream to read
* @param err The error dispatcher
*
* @return Two-element array, where the first element (of type
* java.lang.String) contains the name of the autodetected encoding, and
* the second element (of type java.lang.Boolean) specifies whether the
* encoding was specified by the encoding attribute of an XML declaration
* (prolog).
*/
public static Object[] getEncoding(InputStream in, ErrorDispatcher err)
throws IOException, JasperException
{
XMLEncodingDetector detector = new XMLEncodingDetector(in, err);
detector.createInitialReader();
detector.scanXMLDecl();
return new Object[] { detector.encoding,
new Boolean(detector.isEncodingSetInProlog) };
}
public static Object[] getEncoding(String fname, JarFile jarFile,
JspCompilationContext ctxt,
ErrorDispatcher err)
throws IOException, JasperException
{
InputStream inStream = JspUtil.getInputStream(fname, jarFile,
ctxt, err);
Object[] ret = getEncoding(inStream, err);
inStream.close();
return ret;
}
/**
* Constructor.
*/
public XMLEncodingDetector(InputStream stream, ErrorDispatcher err) {
this.stream = stream;
this.err = err;
fSymbolTable = new SymbolTable();
fCurrentEntity = this;
}
// stub method
void endEntity() {
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.startEntity()
private void createInitialReader() throws IOException, JasperException {
// wrap this stream in RewindableInputStream
stream = new RewindableInputStream(stream);
// perform auto-detect of encoding if necessary
if (encoding == null) {
// read first four bytes and determine encoding
final byte[] b4 = new byte[4];
int count = 0;
for (; count<4; count++ ) {
b4[count] = (byte)stream.read();
}
if (count == 4) {
Object [] encodingDesc = getEncodingName(b4, count);
encoding = (String)(encodingDesc[0]);
isBigEndian = (Boolean)(encodingDesc[1]);
stream.reset();
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
if (count > 2 && encoding.equals("UTF-8")) {
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
// ignore first three bytes...
stream.skip(3);
}
}
reader = createReader(stream, encoding, isBigEndian);
} else {
reader = createReader(stream, encoding, isBigEndian);
}
}
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.createReader
/**
* Creates a reader capable of reading the given input stream in
* the specified encoding.
*
* @param inputStream The input stream.
* @param encoding The encoding name that the input stream is
* encoded using. If the user has specified that
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
* specify a byte order, this tells whether the order
* is bigEndian. null means unknown or not relevant.
*
* @return Returns a reader.
*/
private Reader createReader(InputStream inputStream, String encoding,
Boolean isBigEndian)
throws IOException, JasperException {
// normalize encoding name
if (encoding == null) {
encoding = "UTF-8";
}
// try to use an optimized reader
String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
if (ENCODING.equals("UTF-8")) {
return new UTF8Reader(inputStream, fBufferSize, err);
}
if (ENCODING.equals("US-ASCII")) {
return new ASCIIReader(inputStream, fBufferSize, err);
}
if (ENCODING.equals("ISO-10646-UCS-4")) {
if (isBigEndian != null) {
boolean isBE = isBigEndian.booleanValue();
if (isBE) {
return new UCSReader(inputStream, UCSReader.UCS4BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS4LE);
}
} else {
err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
encoding);
}
}
if (ENCODING.equals("ISO-10646-UCS-2")) {
if (isBigEndian != null) { // sould never happen with this encoding...
boolean isBE = isBigEndian.booleanValue();
if (isBE) {
return new UCSReader(inputStream, UCSReader.UCS2BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS2LE);
}
} else {
err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
encoding);
}
}
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
boolean validJava = XMLChar.isValidJavaEncoding(encoding);
if (!validIANA || (fAllowJavaEncodings && !validJava)) {
err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
if (javaEncoding == null) {
if (fAllowJavaEncodings) {
javaEncoding = encoding;
} else {
err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
// see comment above.
javaEncoding = "ISO8859_1";
}
}
return new InputStreamReader(inputStream, javaEncoding);
} // createReader(InputStream,String, Boolean): Reader
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.getEncodingName
/**
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where
* appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
* @return a 2-element array: the first element, an IANA-encoding string,
* the second element a Boolean which is true iff the document is big
* endian, false if it's little-endian, and null if the distinction isn't
* relevant.
*/
private Object[] getEncodingName(byte[] b4, int count) {
if (count < 2) {
return new Object[]{"UTF-8", null};
}
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
return new Object [] {"UTF-16LE", new Boolean(false)};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
return new Object [] {"UTF-8", null};
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
return new Object [] {"UTF-8", null};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
return new Object [] {"UTF-8", null};
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
return new Object [] {"UTF-16BE", new Boolean(true)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
return new Object [] {"UTF-16LE", new Boolean(false)};
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
return new Object [] {"CP037", null};
}
// default encoding
return new Object [] {"UTF-8", null};
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
/** Returns true if the current entity being scanned is external. */
public boolean isExternal() {
return true;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is <em>not</em> consumed.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public int peekChar() throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// peek at character
int c = fCurrentEntity.ch[fCurrentEntity.position];
// return peeked character
if (fCurrentEntity.isExternal()) {
return c != '\r' ? c : '\n';
}
else {
return c;
}
} // peekChar():int
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is consumed.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public int scanChar() throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// scan character
int c = fCurrentEntity.ch[fCurrentEntity.position++];
boolean external = false;
if (c == '\n' ||
(c == '\r' && (external = fCurrentEntity.isExternal()))) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = (char)c;
load(1, false);
}
if (c == '\r' && external) {
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
fCurrentEntity.position--;
}
c = '\n';
}
}
// return character that was scanned
fCurrentEntity.columnNumber++;
return c;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
/**
* Returns a string matching the Name production appearing immediately
* on the input as a symbol, or null if no Name string is present.
* <p>
* <strong>Note:</strong> The Name characters are consumed.
* <p>
* <strong>Note:</strong> The string returned must be a symbol. The
* SymbolTable can be used for this purpose.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see org.apache.xerces.util.SymbolTable
* @see org.apache.xerces.util.XMLChar#isName
* @see org.apache.xerces.util.XMLChar#isNameStart
*/
public String scanName() throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// scan name
int offset = fCurrentEntity.position;
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
offset = 0;
if (load(1, false)) {
fCurrentEntity.columnNumber++;
String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
0, 1);
return symbol;
}
}
while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
int length = fCurrentEntity.position - offset;
if (length == fBufferSize) {
// bad luck we have to resize our buffer
char[] tmp = new char[fBufferSize * 2];
System.arraycopy(fCurrentEntity.ch, offset,
tmp, 0, length);
fCurrentEntity.ch = tmp;
fBufferSize *= 2;
} else {
System.arraycopy(fCurrentEntity.ch, offset,
fCurrentEntity.ch, 0, length);
}
offset = 0;
if (load(length, false)) {
break;
}
}
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length;
// return name
String symbol = null;
if (length > 0) {
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
}
return symbol;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
/**
* Scans a range of attribute value data, setting the fields of the
* XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of attribute value data. This method may return
* before the quote character due to reaching the end of the input
* buffer or any other reason.
* <p>
* <strong>Note:</strong> The fields contained in the XMLString
* structure are not guaranteed to remain valid upon subsequent calls
* to the entity scanner. Therefore, the caller is responsible for
* immediately using the returned character data or making a copy of
* the character data.
*
* @param quote The quote character that signifies the end of the
* attribute value data.
* @param content The content structure to fill.
*
* @return Returns the next character on the input, if known. This
* value may be -1 but this does <em>note</em> designate
* end of file.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public int scanLiteral(int quote, XMLString content)
throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
load(1, false);
fCurrentEntity.position = 0;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean external = fCurrentEntity.isExternal();
if (c == '\n' || (c == '\r' && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
/***/
}
else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
/*** NEWLINE NORMALIZATION ***
if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
&& external) {
fCurrentEntity.position++;
offset++;
}
/***/
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
content.setValues(fCurrentEntity.ch, offset, length);
return -1;
}
}
// scan literal value
while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == quote &&
(!fCurrentEntity.literal || external))
|| c == '%' || !XMLChar.isContent(c)) {
fCurrentEntity.position--;
break;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
content.setValues(fCurrentEntity.ch, offset, length);
// return next character
if (fCurrentEntity.position != fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position];
// NOTE: We don't want to accidentally signal the
// end of the literal if we're expanding an
// entity appearing in the literal. -Ac
if (c == quote && fCurrentEntity.literal) {
c = -1;
}
}
else {
c = -1;
}
return c;
}
/**
* Scans a range of character data up to the specified delimiter,
* setting the fields of the XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This assumes that the internal buffer is
* at least the same size, or bigger, than the length of the delimiter
* and that the delimiter contains at least one character.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of character data. This method may return before
* the delimiter due to reaching the end of the input buffer or any
* other reason.
* <p>
* <strong>Note:</strong> The fields contained in the XMLString
* structure are not guaranteed to remain valid upon subsequent calls
* to the entity scanner. Therefore, the caller is responsible for
* immediately using the returned character data or making a copy of
* the character data.
*
* @param delimiter The string that signifies the end of the character
* data to be scanned.
* @param data The data structure to fill.
*
* @return Returns true if there is more data to scan, false otherwise.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public boolean scanData(String delimiter, XMLStringBuffer buffer)
throws IOException {
boolean done = false;
int delimLen = delimiter.length();
char charAt0 = delimiter.charAt(0);
boolean external = fCurrentEntity.isExternal();
do {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
load(fCurrentEntity.count - fCurrentEntity.position, false);
fCurrentEntity.position = 0;
}
if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
// something must be wrong with the input: e.g., file ends an
// unterminated comment
int length = fCurrentEntity.count - fCurrentEntity.position;
buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
length);
fCurrentEntity.columnNumber += fCurrentEntity.count;
fCurrentEntity.position = fCurrentEntity.count;
load(0,true);
return false;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if (c == '\n' || (c == '\r' && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
fCurrentEntity.count = newlines;
if (load(newlines, false)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
}
// iterate over buffer looking for delimiter
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == charAt0) {
// looks like we just hit the delimiter
int delimOffset = fCurrentEntity.position - 1;
for (int i = 1; i < delimLen; i++) {
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.position -= i;
break OUTER;
}
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (delimiter.charAt(i) != c) {
fCurrentEntity.position--;
break;
}
}
if (fCurrentEntity.position == delimOffset + delimLen) {
done = true;
break;
}
}
else if (c == '\n' || (external && c == '\r')) {
fCurrentEntity.position--;
break;
}
else if (XMLChar.isInvalid(c)) {
fCurrentEntity.position--;
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
if (done) {
length -= delimLen;
}
buffer.append (fCurrentEntity.ch, offset, length);
// return true if string was skipped
} while (!done);
return !done;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
/**
* Skips a character appearing immediately on the input.
* <p>
* <strong>Note:</strong> The character is consumed only if it matches
* the specified character.
*
* @param c The character to skip.
*
* @return Returns true if the character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public boolean skipChar(int c) throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip character
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == c) {
fCurrentEntity.position++;
if (c == '\n') {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
}
else {
fCurrentEntity.columnNumber++;
}
return true;
} else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
// handle newlines
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = (char)cc;
load(1, false);
}
fCurrentEntity.position++;
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
}
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
return true;
}
// character was not skipped
return false;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
/**
* Skips space characters appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if they are
* space characters.
*
* @return Returns true if at least one space character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see org.apache.xerces.util.XMLChar#isSpace
*/
public boolean skipSpaces() throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
if (XMLChar.isSpace(c)) {
boolean external = fCurrentEntity.isExternal();
do {
boolean entityChanged = false;
// handle newlines
if (c == '\n' || (external && c == '\r')) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = (char)c;
entityChanged = load(1, true);
if (!entityChanged)
// the load change the position to be 1,
// need to restore it when entity not changed
fCurrentEntity.position = 0;
}
if (c == '\r' && external) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
fCurrentEntity.position--;
}
}
/*** NEWLINE NORMALIZATION ***
else {
if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
&& external) {
fCurrentEntity.position++;
}
}
/***/
}
else {
fCurrentEntity.columnNumber++;
}
// load more characters, if needed
if (!entityChanged)
fCurrentEntity.position++;
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
return true;
}
// no spaces were found
return false;
}
/**
* Skips the specified string appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if they are
* space characters.
*
* @param s The string to skip.
*
* @return Returns true if the string was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
public boolean skipString(String s) throws IOException {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip string
final int length = s.length();
for (int i = 0; i < length; i++) {
char c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c != s.charAt(i)) {
fCurrentEntity.position -= i + 1;
return false;
}
if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
// REVISIT: Can a string to be skipped cross an
// entity boundary? -Ac
if (load(i + 1, false)) {
fCurrentEntity.position -= i + 1;
return false;
}
}
}
fCurrentEntity.columnNumber += length;
return true;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
/**
* Loads a chunk of text.
*
* @param offset The offset into the character buffer to
* read the next batch of characters.
* @param changeEntity True if the load should change entities
* at the end of the entity, otherwise leave
* the current entity in place and the entity
* boundary will be signaled by the return
* value.
*
* @returns Returns true if the entity changed as a result of this
* load operation.
*/
final boolean load(int offset, boolean changeEntity)
throws IOException {
// read characters
int length = fCurrentEntity.mayReadChunks?
(fCurrentEntity.ch.length - offset):
(DEFAULT_XMLDECL_BUFFER_SIZE);
int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
length);
// reset count and position
boolean entityChanged = false;
if (count != -1) {
if (count != 0) {
fCurrentEntity.count = count + offset;
fCurrentEntity.position = offset;
}
}
// end of this entity
else {
fCurrentEntity.count = offset;
fCurrentEntity.position = offset;
entityChanged = true;
if (changeEntity) {
endEntity();
if (fCurrentEntity == null) {
throw new EOFException();
}
// handle the trailing edges
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
}
}
return entityChanged;
}
// Adapted from:
// org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
/**
* This class wraps the byte inputstreams we're presented with.
* We need it because java.io.InputStreams don't provide
* functionality to reread processed bytes, and they have a habit
* of reading more than one character when you call their read()
* methods. This means that, once we discover the true (declared)
* encoding of a document, we can neither backtrack to read the
* whole doc again nor start reading where we are with a new
* reader.
*
* This class allows rewinding an inputStream by allowing a mark
* to be set, and the stream reset to that position. <strong>The
* class assumes that it needs to read one character per
* invocation when it's read() method is inovked, but uses the
* underlying InputStream's read(char[], offset length) method--it
* won't buffer data read this way!</strong>
*
* @author Neil Graham, IBM
* @author Glenn Marcy, IBM
*/
private final class RewindableInputStream extends InputStream {
private InputStream fInputStream;
private byte[] fData;
private int fStartOffset;
private int fEndOffset;
private int fOffset;
private int fLength;
private int fMark;
public RewindableInputStream(InputStream is) {
fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
fInputStream = is;
fStartOffset = 0;
fEndOffset = -1;
fOffset = 0;
fLength = 0;
fMark = 0;
}
public void setStartOffset(int offset) {
fStartOffset = offset;
}
public void rewind() {
fOffset = fStartOffset;
}
public int read() throws IOException {
int b = 0;
if (fOffset < fLength) {
return fData[fOffset++] & 0xff;
}
if (fOffset == fEndOffset) {
return -1;
}
if (fOffset == fData.length) {
byte[] newData = new byte[fOffset << 1];
System.arraycopy(fData, 0, newData, 0, fOffset);
fData = newData;
}
b = fInputStream.read();
if (b == -1) {
fEndOffset = fOffset;
return -1;
}
fData[fLength++] = (byte)b;
fOffset++;
return b & 0xff;
}
public int read(byte[] b, int off, int len) throws IOException {
int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
// better get some more for the voracious reader...
if (fCurrentEntity.mayReadChunks) {
return fInputStream.read(b, off, len);
}
int returnedVal = read();
if (returnedVal == -1) {
fEndOffset = fOffset;
return -1;
}
b[off] = (byte)returnedVal;
return 1;
}
if (len < bytesLeft) {
if (len <= 0) {
return 0;
}
}
else {
len = bytesLeft;
}
if (b != null) {
System.arraycopy(fData, fOffset, b, off, len);
}
fOffset += len;
return len;
}
public long skip(long n)
throws IOException
{
int bytesLeft;
if (n <= 0) {
return 0;
}
bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return 0;
}
return fInputStream.skip(n);
}
if (n <= bytesLeft) {
fOffset += n;
return n;
}
fOffset += bytesLeft;
if (fOffset == fEndOffset) {
return bytesLeft;
}
n -= bytesLeft;
/*
* In a manner of speaking, when this class isn't permitting more
* than one byte at a time to be read, it is "blocking". The
* available() method should indicate how much can be read without
* blocking, so while we're in this mode, it should only indicate
* that bytes in its buffer are available; otherwise, the result of
* available() on the underlying InputStream is appropriate.
*/
return fInputStream.skip(n) + bytesLeft;
}
public int available() throws IOException {
int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
return fCurrentEntity.mayReadChunks ? fInputStream.available()
: 0;
}
return bytesLeft;
}
public void mark(int howMuch) {
fMark = fOffset;
}
public void reset() {
fOffset = fMark;
}
public boolean markSupported() {
return true;
}
public void close() throws IOException {
if (fInputStream != null) {
fInputStream.close();
fInputStream = null;
}
}
} // end of RewindableInputStream class
// Adapted from:
// org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
private void scanXMLDecl() throws IOException, JasperException {
if (skipString("<?xml")) {
fMarkupDepth++;
// NOTE: special case where document starts with a PI
// whose name starts with "xml" (e.g. "xmlfoo")
if (XMLChar.isName(peekChar())) {
fStringBuffer.clear();
fStringBuffer.append("xml");
while (XMLChar.isName(peekChar())) {
fStringBuffer.append((char)scanChar());
}
String target = fSymbolTable.addSymbol(fStringBuffer.ch,
fStringBuffer.offset,
fStringBuffer.length);
scanPIData(target, fString);
}
// standard XML declaration
else {
scanXMLDeclOrTextDecl(false);
}
}
}
// Adapted from:
// org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
/**
* Scans an XML or text declaration.
* <p>
* <pre>
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
* | ('"' ('yes' | 'no') '"'))
*
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
* </pre>
*
* @param scanningTextDecl True if a text declaration is to
* be scanned instead of an XML
* declaration.
*/
private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
throws IOException, JasperException {
// scan decl
scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
fMarkupDepth--;
// pseudo-attribute values
String encodingPseudoAttr = fStrings[1];
// set encoding on reader
if (encodingPseudoAttr != null) {
isEncodingSetInProlog = true;
encoding = encodingPseudoAttr;
}
}
// Adapted from:
// org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
/**
* Scans an XML or text declaration.
* <p>
* <pre>
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
* | ('"' ('yes' | 'no') '"'))
*
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
* </pre>
*
* @param scanningTextDecl True if a text declaration is to
* be scanned instead of an XML
* declaration.
* @param pseudoAttributeValues An array of size 3 to return the version,
* encoding and standalone pseudo attribute values
* (in that order).
*
* <strong>Note:</strong> This method uses fString, anything in it
* at the time of calling is lost.
*/
private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
String[] pseudoAttributeValues)
throws IOException, JasperException {
// pseudo-attribute values
String version = null;
String encoding = null;
String standalone = null;
// scan pseudo-attributes
final int STATE_VERSION = 0;
final int STATE_ENCODING = 1;
final int STATE_STANDALONE = 2;
final int STATE_DONE = 3;
int state = STATE_VERSION;
boolean dataFoundForTarget = false;
boolean sawSpace = skipSpaces();
while (peekChar() != '?') {
dataFoundForTarget = true;
String name = scanPseudoAttribute(scanningTextDecl, fString);
switch (state) {
case STATE_VERSION: {
if (name == fVersionSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
null);
}
version = fString.toString();
state = STATE_ENCODING;
if (!version.equals("1.0")) {
// REVISIT: XML REC says we should throw an error
// in such cases.
// some may object the throwing of fatalError.
err.jspError("jsp.error.xml.versionNotSupported",
version);
}
} else if (name == fEncodingSymbol) {
if (!scanningTextDecl) {
err.jspError("jsp.error.xml.versionInfoRequired");
}
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
} else {
if (scanningTextDecl) {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
else {
err.jspError("jsp.error.xml.versionInfoRequired");
}
}
break;
}
case STATE_ENCODING: {
if (name == fEncodingSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
// TODO: check encoding name; set encoding on
// entity scanner
} else if (!scanningTextDecl && name == fStandaloneSymbol) {
if (!sawSpace) {
err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
err.jspError("jsp.error.xml.sdDeclInvalid");
}
} else {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
break;
}
case STATE_STANDALONE: {
if (name == fStandaloneSymbol) {
if (!sawSpace) {
err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
err.jspError("jsp.error.xml.sdDeclInvalid");
}
} else {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
break;
}
default: {
err.jspError("jsp.error.xml.noMorePseudoAttributes");
}
}
sawSpace = skipSpaces();
}
// REVISIT: should we remove this error reporting?
if (scanningTextDecl && state != STATE_DONE) {
err.jspError("jsp.error.xml.morePseudoAttributes");
}
// If there is no data in the xml or text decl then we fail to report
// error for version or encoding info above.
if (scanningTextDecl) {
if (!dataFoundForTarget && encoding == null) {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
} else {
if (!dataFoundForTarget && version == null) {
err.jspError("jsp.error.xml.versionInfoRequired");
}
}
// end
if (!skipChar('?')) {
err.jspError("jsp.error.xml.xmlDeclUnterminated");
}
if (!skipChar('>')) {
err.jspError("jsp.error.xml.xmlDeclUnterminated");
}
// fill in return array
pseudoAttributeValues[0] = version;
pseudoAttributeValues[1] = encoding;
pseudoAttributeValues[2] = standalone;
}
// Adapted from:
// org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
/**
* Scans a pseudo attribute.
*
* @param scanningTextDecl True if scanning this pseudo-attribute for a
* TextDecl; false if scanning XMLDecl. This
* flag is needed to report the correct type of
* error.
* @param value The string to fill in with the attribute
* value.
*
* @return The name of the attribute
*
* <strong>Note:</strong> This method uses fStringBuffer2, anything in it
* at the time of calling is lost.
*/
public String scanPseudoAttribute(boolean scanningTextDecl,
XMLString value)
throws IOException, JasperException {
String name = scanName();
if (name == null) {
err.jspError("jsp.error.xml.pseudoAttrNameExpected");
}
skipSpaces();
if (!skipChar('=')) {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.eqRequiredInTextDecl"
: "jsp.error.xml.eqRequiredInXMLDecl",
name);
}
skipSpaces();
int quote = peekChar();
if (quote != '\'' && quote != '"') {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.quoteRequiredInTextDecl"
: "jsp.error.xml.quoteRequiredInXMLDecl" ,
name);
}
scanChar();
int c = scanLiteral(quote, value);
if (c != quote) {
fStringBuffer2.clear();
do {
fStringBuffer2.append(value);
if (c != -1) {
if (c == '&' || c == '%' || c == '<' || c == ']') {
fStringBuffer2.append((char)scanChar());
}
else if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer2);
}
else if (XMLChar.isInvalid(c)) {
String key = scanningTextDecl
? "jsp.error.xml.invalidCharInTextDecl"
: "jsp.error.xml.invalidCharInXMLDecl";
reportFatalError(key, Integer.toString(c, 16));
scanChar();
}
}
c = scanLiteral(quote, value);
} while (c != quote);
fStringBuffer2.append(value);
value.setValues(fStringBuffer2);
}
if (!skipChar(quote)) {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.closeQuoteMissingInTextDecl"
: "jsp.error.xml.closeQuoteMissingInXMLDecl",
name);
}
// return
return name;
}
// Adapted from:
// org.apache.xerces.impl.XMLScanner.scanPIData
/**
* Scans a processing data. This is needed to handle the situation
* where a document starts with a processing instruction whose
* target name <em>starts with</em> "xml". (e.g. xmlfoo)
*
* <strong>Note:</strong> This method uses fStringBuffer, anything in it
* at the time of calling is lost.
*
* @param target The PI target
* @param data The string to fill in with the data
*/
private void scanPIData(String target, XMLString data)
throws IOException, JasperException {
// check target
if (target.length() == 3) {
char c0 = Character.toLowerCase(target.charAt(0));
char c1 = Character.toLowerCase(target.charAt(1));
char c2 = Character.toLowerCase(target.charAt(2));
if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
err.jspError("jsp.error.xml.reservedPITarget");
}
}
// spaces
if (!skipSpaces()) {
if (skipString("?>")) {
// we found the end, there is no data
data.clear();
return;
}
else {
// if there is data there should be some space
err.jspError("jsp.error.xml.spaceRequiredInPI");
}
}
fStringBuffer.clear();
// data
if (scanData("?>", fStringBuffer)) {
do {
int c = peekChar();
if (c != -1) {
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer);
} else if (XMLChar.isInvalid(c)) {
err.jspError("jsp.error.xml.invalidCharInPI",
Integer.toHexString(c));
scanChar();
}
}
} while (scanData("?>", fStringBuffer));
}
data.setValues(fStringBuffer);
}
// Adapted from:
// org.apache.xerces.impl.XMLScanner.scanSurrogates
/**
* Scans surrogates and append them to the specified buffer.
* <p>
* <strong>Note:</strong> This assumes the current char has already been
* identified as a high surrogate.
*
* @param buf The StringBuffer to append the read surrogates to.
* @returns True if it succeeded.
*/
private boolean scanSurrogates(XMLStringBuffer buf)
throws IOException, JasperException {
int high = scanChar();
int low = peekChar();
if (!XMLChar.isLowSurrogate(low)) {
err.jspError("jsp.error.xml.invalidCharInContent",
Integer.toString(high, 16));
return false;
}
scanChar();
// convert surrogates to supplemental character
int c = XMLChar.supplemental((char)high, (char)low);
// supplemental character must be a valid XML character
if (!XMLChar.isValid(c)) {
err.jspError("jsp.error.xml.invalidCharInContent",
Integer.toString(c, 16));
return false;
}
// fill in the buffer
buf.append((char)high);
buf.append((char)low);
return true;
}
// Adapted from:
// org.apache.xerces.impl.XMLScanner.reportFatalError
/**
* Convenience function used in all XML scanners.
*/
private void reportFatalError(String msgId, String arg)
throws JasperException {
err.jspError(msgId, arg);
}
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>
Re: cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser
ASCIIReader.java UCSReader.java UTF8Reader.java XMLEncodingDetector.java
Posted by Remy Maucherat <re...@apache.org>.
luehe@apache.org wrote:
> luehe 2002/11/06 12:14:20
>
> Modified: jasper2/src/share/org/apache/jasper/compiler
> ErrorDispatcher.java JspReader.java JspUtil.java
> PageDataImpl.java PageInfo.java
> ParserController.java Validator.java
> jasper2/src/share/org/apache/jasper/resources
> messages.properties messages_es.properties
> messages_ja.properties
> Added: jasper2/src/share/org/apache/jasper/xmlparser
> ASCIIReader.java UCSReader.java UTF8Reader.java
> XMLEncodingDetector.java
> Log:
> First cut at I18N changes.
>
There are problems with that patch:
- What does it do ?
- It seems like a big change, was it discussed before on the list ?
- Trying to access the admin webapp:
java.lang.StackOverflowError
at org.apache.jasper.xmlparser.UTF8Reader.read(UTF8Reader.java:293)
at
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1041)
at
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1065)
at
org.apache.jasper.xmlparser.XMLEncodingDetector.load(XMLEncodingDetector.java:1065)
(you get the idea)
Thanks,
Remy
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>