You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by lu...@apache.org on 2003/02/13 00:44:24 UTC

cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser XMLEncodingDetector.java XercesEncodingDetector.java

luehe       2003/02/12 15:44:24

  Modified:    jasper2/src/share/org/apache/jasper/compiler Compiler.java
                        PageInfo.java ParserController.java Validator.java
               jasper2/src/share/org/apache/jasper/xmlparser
                        XMLEncodingDetector.java
                        XercesEncodingDetector.java
  Log:
  Last round of encoding-determination changes, to comply with the spec.
  
  Revision  Changes    Path
  1.52      +1 -1      jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java
  
  Index: Compiler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java,v
  retrieving revision 1.51
  retrieving revision 1.52
  diff -u -r1.51 -r1.52
  --- Compiler.java	12 Feb 2003 02:22:51 -0000	1.51
  +++ Compiler.java	12 Feb 2003 23:44:22 -0000	1.52
  @@ -210,7 +210,7 @@
   	    pageInfo.setELIgnoredSpecified(true);
   	}
   	pageInfo.setIsXml(JspUtil.booleanValue(jspProperty.isXml()));
  -	pageInfo.setPageEncoding(jspProperty.getPageEncoding());
  +	pageInfo.setConfigEncoding(jspProperty.getPageEncoding());
   	pageInfo.setELIgnored(JspUtil.booleanValue(jspProperty.isELIgnored()));
   	pageInfo.setScriptingInvalid(JspUtil.booleanValue(jspProperty.isScriptingInvalid()));
   	if (jspProperty.getIncludePrelude() != null) {
  
  
  
  1.19      +30 -3     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java
  
  Index: PageInfo.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- PageInfo.java	12 Feb 2003 02:22:51 -0000	1.18
  +++ PageInfo.java	12 Feb 2003 23:44:22 -0000	1.19
  @@ -89,6 +89,17 @@
       private String errorPage = null;
       private String pageEncoding = null;
   
  +    // Encoding specified in JSP config element
  +    private String configEncoding;
  +
  +    /*
  +     * Indicates whether an encoding has been explicitly specified in the
  +     * page's XML prolog (only used for pages in XML syntax).
  +     * This information is used to decide whether a translation error must
  +     * be reported for encoding conflicts.
  +     */
  +    private boolean isEncodingSpecifiedInProlog;
  +
       private int maxTagNesting = 0;
       private boolean scriptless = false;
       private boolean scriptingInvalid = false;
  @@ -228,6 +239,22 @@
   
       public String getPageEncoding() {
   	return pageEncoding;
  +    }
  +
  +    public void setIsEncodingSpecifiedInProlog(boolean isSpecified) {
  +	this.isEncodingSpecifiedInProlog = isSpecified;
  +    }
  +
  +    public boolean isEncodingSpecifiedInProlog() {
  +	return this.isEncodingSpecifiedInProlog;
  +    }
  +
  +    public void setConfigEncoding(String enc) {
  +	this.configEncoding = enc;
  +    }
  +
  +    public String getConfigEncoding() {
  +	return this.configEncoding;
       }
   
       public int getMaxTagNesting() {
  
  
  
  1.32      +35 -27    jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java
  
  Index: ParserController.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- ParserController.java	27 Jan 2003 23:15:33 -0000	1.31
  +++ ParserController.java	12 Feb 2003 23:44:23 -0000	1.32
  @@ -199,27 +199,23 @@
   	figureOutJspDocument(absFileName, jarFile);
   
   	if (isTopFile) {
  -	    if (isXml) {
  -		// Make sure the encoding determined from the XML prolog
  -		// matches that in the JSP config element, if present.
  -		// Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical.
  -		String jspConfigPageEnc = pageInfo.getPageEncoding();
  +	    if (isXml && pageInfo.isEncodingSpecifiedInProlog()) {
  +		/*
  +		 * Make sure the encoding explicitly specified in the XML
  +		 * prolog (if any) matches that in the JSP config element
  +		 * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
  +		 * identical.
  +		 */
  +		String jspConfigPageEnc = pageInfo.getConfigEncoding();
   		if (jspConfigPageEnc != null
   		        && !jspConfigPageEnc.equals(sourceEnc)
  -			&& (!jspConfigPageEnc.startsWith("UTF-16")
  +		        && (!jspConfigPageEnc.startsWith("UTF-16")
   			    || !sourceEnc.startsWith("UTF-16"))) {
   		    err.jspError("jsp.error.prolog_config_encoding_mismatch",
   				 sourceEnc, jspConfigPageEnc);
   		}
  -		// override the encoding that may have been set from JSP config
  -		// info (in Compiler.generateJava()), since that applies to
  -		// standard syntax only
  -		pageInfo.setPageEncoding(sourceEnc);
  -	    } else {
  -		if (pageInfo.getPageEncoding() == null) {
  -		    pageInfo.setPageEncoding(sourceEnc);
  -		}
   	    }
  +	    pageInfo.setPageEncoding(sourceEnc);
   	    pageInfo.setIsXml(isXml);
   	    isTopFile = false;
   	} else {
  @@ -305,12 +301,20 @@
       private void figureOutJspDocument(String fname, JarFile jarFile)
   	        throws JasperException, IOException {
   
  -	// 'true' if the syntax of the page (XML or standard) is identified by
  -	// external information: either via a JSP configuration element or
  -	// the ".jspx" suffix
  +	/*
  +	 * 'true' if the syntax of the page (XML or standard) is identified by
  +	 * external information: either via a JSP configuration element or
  +	 * the ".jspx" suffix
  +	 */
   	boolean isExternal = false;
   	isXml = false;
   
  +	/*
  +	 * Indicates whether we need to revert from temporary usage of
  +	 * "ISO-8859-1" back to "UTF-8"
  +	 */
  +	boolean revert = false;
  +
   	if (pageInfo.isXmlSpecified()) {
   	    // If <is-xml> is specified in a <jsp-property-group>, it is used.
   	    isXml = pageInfo.isXml();
  @@ -321,8 +325,8 @@
   	}
   	
   	if (isExternal && !isXml) {
  -	    // JSP syntax
  -	    if (pageInfo.getPageEncoding() != null) {
  +	    // JSP (standard) syntax
  +	    if (pageInfo.getConfigEncoding() != null) {
   		// Encoding specified in jsp-config (used by standard syntax
   		// only)
   		sourceEnc = pageInfo.getPageEncoding();
  @@ -332,18 +336,19 @@
   		sourceEnc = "ISO-8859-1";
   	    }
   	} else {
  -	    // XML syntax or unknown, autodetect encoding ...
  +	    // XML syntax or unknown, (auto)detect encoding ...
   	    Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile,
   							   ctxt, err);
   	    sourceEnc = (String) ret[0];
  -	    boolean isFallback = ((Boolean) ret[1]).booleanValue();
  -	    if (isFallback) {
  +	    if (((Boolean) ret[1]).booleanValue()) {
  +		pageInfo.setIsEncodingSpecifiedInProlog(true);
  +	    }
  +
  +	    if (!isXml && sourceEnc.equals("UTF-8")) {
   		/*
  -		 * Page does not have any XML prolog, or contains an XML
  -		 * prolog that is being used as template text (in standard
  -		 * syntax). This means that the page's encoding cannot be
  -		 * determined from the 'encoding' attribute of an XML prolog,
  -		 * or autodetected from an XML prolog.
  +		 * We don't know if we're dealing with XML or standard syntax.
  +		 * Therefore, we need to check and see if the page contains
  +		 * a <jsp:root> element.
   		 *
   		 * We need to be careful, because the page may be encoded in
   		 * ISO-8859-1 (or something entirely different), and may
  @@ -357,6 +362,7 @@
   		 * and ISO-8859-1 are extensions of ASCII).
   		 */
   		sourceEnc = "ISO-8859-1";
  +		revert = true;
   	    }
   	}
   
  @@ -389,6 +395,8 @@
   	    Mark mark = jspReader.skipUntil(JSP_ROOT_TAG);
   	    if (mark != null) {
   	        isXml = true;
  +		if (revert) 
  +		    sourceEnc = "UTF-8";
   		return;
   	    } else {
   	        isXml = false;
  
  
  
  1.75      +36 -30    jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java
  
  Index: Validator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v
  retrieving revision 1.74
  retrieving revision 1.75
  diff -u -r1.74 -r1.75
  --- Validator.java	10 Feb 2003 17:07:08 -0000	1.74
  +++ Validator.java	12 Feb 2003 23:44:23 -0000	1.75
  @@ -252,34 +252,10 @@
   			err.jspError(n, "jsp.error.page.multiple.pageencoding");
   		    pageEncodingSeen = true;
   		    /*
  -		     * It is a translation-time error to name different page
  -		     * character encodings in two or more of the following:
  -		     * the XML prolog of a JSP page, the pageEncoding
  -		     * attribute of the page directive of the JSP page, and in
  -		     * a JSP configuration element (whose URL pattern matches
  -		     * the page).
  -		     *
  -		     * At this point, we've already verified (in 
  -		     * ParserController.parse()) that the page character
  -		     * encodings specified in a JSP config element and XML
  -		     * prolog match.
  -		     *
  -		     * Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical.
  +		     * Report any encoding conflict, treating "UTF-16",
  +		     * "UTF-16BE", and "UTF-16LE" as identical.
   		     */
  -		    String compareEnc = pageInfo.getPageEncoding();
  -		    if (!value.equals(compareEnc) 
  -			    && (!value.startsWith("UTF-16")
  -				|| !compareEnc.startsWith("UTF-16"))) {
  -			if (pageInfo.isXml()) {
  -			    err.jspError(n,
  -					 "jsp.error.prolog_pagedir_encoding_mismatch",
  -					 compareEnc, value);
  -			} else {
  -			    err.jspError(n,
  -					 "jsp.error.config_pagedir_encoding_mismatch",
  -					 compareEnc, value);
  -			}
  -		    }
  +		    compareEncodings(value, n, pageInfo);
   		}
   	    }
   
  @@ -344,6 +320,36 @@
   	    // Do nothing, since this variable directive has already been
   	    // validated by TagFileProcessor when it created a TagInfo object
   	    // from the tag file in which the directive appeared
  +	}
  +
  +	/*
  +	 * Compares the encoding specified in the 'pageEncoding' attribute of
  +	 * the page directive with the encoding explicitly specified in the
  +	 * XML prolog (only for XML syntax) and the encoding specified in
  +	 * the JSP config element whose URL pattern matches the page, and 
  +	 * throws an error in case of a mismatch.
  +	 */
  +	private void compareEncodings(String pageDirEnc, Node n,
  +				      PageInfo pageInfo)
  +	            throws JasperException {
  +
  +	    String configEnc = pageInfo.getConfigEncoding();
  +	    if (configEnc != null && !pageDirEnc.equals(configEnc) 
  +		    && (!pageDirEnc.startsWith("UTF-16")
  +			|| !configEnc.startsWith("UTF-16"))) {
  +		err.jspError(n, "jsp.error.config_pagedir_encoding_mismatch",
  +			     configEnc, pageDirEnc);
  +	    }
  +
  +	    if (pageInfo.isXml() && pageInfo.isEncodingSpecifiedInProlog()) {
  +		String pageEnc = pageInfo.getPageEncoding();
  +		if (!pageDirEnc.equals(pageEnc) 
  +		        && (!pageDirEnc.startsWith("UTF-16")
  +			    || !pageEnc.startsWith("UTF-16"))) {
  +		    err.jspError(n, "jsp.error.prolog_pagedir_encoding_mismatch",
  +				 pageEnc, pageDirEnc);
  +		}
  +	    }
   	}
       }
   
  
  
  
  1.5       +1 -1      jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java
  
  Index: XMLEncodingDetector.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLEncodingDetector.java	27 Jan 2003 18:10:48 -0000	1.4
  +++ XMLEncodingDetector.java	12 Feb 2003 23:44:23 -0000	1.5
  @@ -104,7 +104,7 @@
   				      ErrorDispatcher err)
   	throws IOException, JasperException
       {
  -        Object result[] = new Object[] { "UTF8", new Boolean(true) };
  +        Object result[] = new Object[] { "UTF8", new Boolean(false) };
           return result;
       }
   }
  
  
  
  1.5       +7 -11     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java
  
  Index: XercesEncodingDetector.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XercesEncodingDetector.java	27 Jan 2003 18:10:48 -0000	1.4
  +++ XercesEncodingDetector.java	12 Feb 2003 23:44:23 -0000	1.5
  @@ -80,7 +80,7 @@
       
       private InputStream stream;
       private String encoding;
  -    private boolean isFallback;
  +    private boolean isEncodingSetInProlog;
       private Boolean isBigEndian;
       private Reader reader;
       
  @@ -134,10 +134,10 @@
        * @param err The error dispatcher
        *
        * @return Two-element array, where the first element (of type
  -     * java.lang.String) contains the name of the (auto)detected encoding, 
  -     * and the second element specifies whether the default encoding
  -     * (UTF-8) is being used as a fallback (because no encoding could be
  -     * detected).
  +     * java.lang.String) contains the name of the (auto)detected encoding, and
  +     * the second element (of type java.lang.Boolean) specifies whether the 
  +     * encoding was specified using the 'encoding' attribute of an XML prolog
  +     * (TRUE) or autodetected (FALSE).
        */
       public Object[] getEncoding(InputStream in, ErrorDispatcher err)
   	throws IOException, JasperException
  @@ -149,7 +149,7 @@
   	detector.scanXMLDecl();
   	
   	return new Object[] { detector.encoding,
  -			      new Boolean(detector.isFallback) };
  +			      new Boolean(detector.isEncodingSetInProlog) };
       }
   
       public Object[] getEncodingMethod(String fname, JarFile jarFile,
  @@ -319,7 +319,6 @@
       private Object[] getEncodingName(byte[] b4, int count) {
   
           if (count < 2) {
  -	    isFallback = true;
               return new Object[]{"UTF-8", null};
           }
   
  @@ -338,7 +337,6 @@
           // default to UTF-8 if we don't have enough bytes to make a
           // good determination of the encoding
           if (count < 3) {
  -	    isFallback = true;
               return new Object [] {"UTF-8", null};
           }
   
  @@ -351,7 +349,6 @@
           // default to UTF-8 if we don't have enough bytes to make a
           // good determination of the encoding
           if (count < 4) {
  -	    isFallback = true;
               return new Object [] {"UTF-8", null};
           }
   
  @@ -393,7 +390,6 @@
           }
   
           // default encoding
  -	isFallback = true;
           return new Object [] {"UTF-8", null};
   
       }
  @@ -1306,7 +1302,7 @@
   
           // set encoding on reader
           if (encodingPseudoAttr != null) {
  -            isFallback = false;
  +            isEncodingSetInProlog = true;
   	    encoding = encodingPseudoAttr;
           }
       }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: tomcat-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: tomcat-dev-help@jakarta.apache.org