You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by lu...@apache.org on 2002/11/07 01:50:52 UTC

cvs commit: jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler ParserController.java

luehe       2002/11/06 16:50:52

  Modified:    jasper2/src/share/org/apache/jasper/compiler
                        ParserController.java
  Log:
  If autodetection yields UTF-8 as the source encoding, use ISO-8859-1
  in the absence of an XML prolog.
  
  Revision  Changes    Path
  1.25      +18 -0     jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java
  
  Index: ParserController.java
  ===================================================================
  RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- ParserController.java	6 Nov 2002 20:14:19 -0000	1.24
  +++ ParserController.java	7 Nov 2002 00:50:52 -0000	1.25
  @@ -280,6 +280,24 @@
   	    if (isEncodingSetInProlog) {
   		// Prolog present only in XML syntax
   		isXml = true;
  +	    } else if (sourceEnc.equals("UTF-8")) {
  +		/*
  +		 * We don't know if we're dealing with an XML document
  +		 * unless isXml is true, but even if isXml is true, we don't
  +		 * know if we're dealing with a JSP document that satisfies
  +		 * the encoding auto-detection rules (the JSP document may not
  +		 * have an XML prolog and start with <jsp:root ...>). 
  +		 * We need to be careful, because the page may be encoded in
  +		 * ISO-8859-1 (or something entirely different), and may
  +		 * contain byte sequences that will cause a UTF-8 converter to
  +		 * throw exceptions. 
  +		 * It is safe to use a source encoding of ISO-8859-1 in this
  +		 * case, as there are no invalid byte sequences in ISO-8859-1,
  +		 * and the byte/character sequences we're looking for are
  +		 * identical in either encoding (both UTF-8 and ISO-8859-1 are
  +		 * extensions of ASCII).
  +		 */
  +		sourceEnc = "ISO-8859-1";
   	    }
   	}
   
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>