You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2016/12/15 21:34:06 UTC
svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper:
compiler/EncodingDetector.java security/SecurityClassLoad.java
Author: markt
Date: Thu Dec 15 21:34:06 2016
New Revision: 1774526
URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
Log:
Add a new encoding detector implementation.
The BoM encoding detection is based in the previous code.
The prolog encoding detection delegates to the JRE's XM<L parser rather than the custom Jasper parser.
Added:
tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (with props)
Modified:
tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (added)
+++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java Thu Dec 15 21:34:06 2016
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jasper.compiler;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/*
+ * The BoM detection is derived from:
+ * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
+ */
+class EncodingDetector {
+
+ private static final XMLInputFactory XML_INPUT_FACTORY;
+ static {
+ XML_INPUT_FACTORY = XMLInputFactory.newFactory();
+ }
+
+ private final BomResult bomResult;
+ private final String prologEncoding;
+
+
+ /*
+ * TODO: Refactor Jasper InputStream creation and handling so the
+ * InputStream passed to this method is buffered and therefore saves
+ * on multiple opening and re-opening of the same file.
+ */
+ EncodingDetector(InputStream is) throws IOException {
+ // Keep buffer size to a minimum here. BoM will be no more than 4 bytes
+ // so that is the maximum we need to buffer
+ BufferedInputStream bis = new BufferedInputStream(is, 4);
+ bis.mark(4);
+
+ bomResult = processBom(bis);
+
+ // Reset the stream back to the start to allow the XML prolog detection
+ // to work. Skip any BoM we discovered.
+ bis.reset();
+ if (bomResult != null) {
+ for (int i = 0; i < bomResult.skip; i++) {
+ is.read();
+ }
+ }
+
+ prologEncoding = getPrologEncoding(bis);
+ }
+
+
+ String getBomEncoding() {
+ return bomResult.encoding;
+ }
+
+
+ Boolean getBigEndian() {
+ return bomResult.bigEndian;
+ }
+
+
+ int getSkip() {
+ return bomResult.skip;
+ }
+
+
+ String getPrologEncoding() {
+ return prologEncoding;
+ }
+
+
+ private String getPrologEncoding(InputStream stream) {
+ String encoding = null;
+ try {
+ XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.createXMLStreamReader(stream);
+ encoding = xmlStreamReader.getCharacterEncodingScheme();
+ } catch (XMLStreamException e) {
+ // Ignore
+ }
+ return encoding;
+ }
+
+
+ private BomResult processBom(InputStream stream) {
+ // Read first four bytes (or as many are available) and determine
+ // encoding
+ try {
+ final byte[] b4 = new byte[4];
+ int count = 0;
+ int singleByteRead;
+ while (count < 4) {
+ singleByteRead = stream.read();
+ if (singleByteRead == -1) {
+ break;
+ }
+ b4[count] = (byte) singleByteRead;
+ count++;
+ }
+
+ return parseBom(b4, count);
+ } catch (IOException ioe) {
+ // Failed.
+ return new BomResult("UTF-8", null, 0);
+ }
+ }
+
+
+ private BomResult parseBom(byte[] b4, int count) {
+
+ if (count < 2) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // UTF-16, with BOM
+ int b0 = b4[0] & 0xFF;
+ int b1 = b4[1] & 0xFF;
+ if (b0 == 0xFE && b1 == 0xFF) {
+ // UTF-16, big-endian
+ return new BomResult("UTF-16BE", Boolean.TRUE, 2);
+ }
+ if (b0 == 0xFF && b1 == 0xFE) {
+ // UTF-16, little-endian
+ return new BomResult("UTF-16LE", Boolean.FALSE, 2);
+ }
+
+ // default to UTF-8 if we don't have enough bytes to make a
+ // good determination of the encoding
+ if (count < 3) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // UTF-8 with a BOM
+ int b2 = b4[2] & 0xFF;
+ if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+ return new BomResult("UTF-8", null, 3);
+ }
+
+ // default to UTF-8 if we don't have enough bytes to make a
+ // good determination of the encoding
+ if (count < 4) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // other encodings
+ int b3 = b4[3] & 0xFF;
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
+ // UCS-4, big endian (1234)
+ return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, little endian (4321)
+ return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
+ // UCS-4, unusual octet order (2143)
+ // REVISIT: What should this be?
+ return new BomResult("ISO-10646-UCS-4", null, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, unusual octect order (3412)
+ // REVISIT: What should this be?
+ return new BomResult("ISO-10646-UCS-4", null, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+ // UTF-16, big-endian, no BOM
+ // (or could turn out to be UCS-2...
+ // REVISIT: What should this be?
+ return new BomResult("UTF-16BE", Boolean.TRUE, 4);
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+ // UTF-16, little-endian, no BOM
+ // (or could turn out to be UCS-2...
+ return new BomResult("UTF-16LE", Boolean.FALSE, 4);
+ }
+ if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
+ // EBCDIC
+ // a la xerces1, return CP037 instead of EBCDIC here
+ return new BomResult("CP037", null, 4);
+ }
+
+ // default encoding
+ return new BomResult("UTF-8", null, 0);
+ }
+
+
+ private static class BomResult {
+
+ public final String encoding;
+ public final Boolean bigEndian;
+ public final int skip;
+
+ public BomResult(String encoding, Boolean bigEndian, int skip) {
+ this.encoding = encoding;
+ this.bigEndian = bigEndian;
+ this.skip = skip;
+ }
+ }
+}
Propchange: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java (original)
+++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java Thu Dec 15 21:34:06 2016
@@ -39,6 +39,10 @@ public final class SecurityClassLoad {
final String basePackage = "org.apache.jasper.";
try {
+ // Ensure XMLInputFactory is loaded with Tomcat's class loader
+ loader.loadClass( basePackage +
+ "comppiler.EncodingDetector");
+
loader.loadClass( basePackage +
"runtime.JspFactoryImpl$PrivilegedGetPageContext");
loader.loadClass( basePackage +
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org
Re: svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper:
compiler/EncodingDetector.java security/SecurityClassLoad.java
Posted by Mark Thomas <ma...@apache.org>.
On 15/12/2016 21:48, Martin Grigorov wrote:
> Hi Mark,
>
> On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:
>
>> Author: markt
>> Date: Thu Dec 15 21:34:06 2016
>> New Revision: 1774526
>>
>> URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
>> Log:
>> Add a new encoding detector implementation.
>> The BoM encoding detection is based in the previous code.
>> The prolog encoding detection delegates to the JRE's XM<L parser rather
>> than the custom Jasper parser.
<snip/>
>> Modified: tomcat/trunk/java/org/apache/jasper/security/
>> SecurityClassLoad.java
>> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
>> jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&
>> view=diff
>> ============================================================
>> ==================
>> --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>> (original)
>> +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>> Thu Dec 15 21:34:06 2016
>> @@ -39,6 +39,10 @@ public final class SecurityClassLoad {
>>
>> final String basePackage = "org.apache.jasper.";
>> try {
>> + // Ensure XMLInputFactory is loaded with Tomcat's class loader
>> + loader.loadClass( basePackage +
>> + "comppiler.EncodingDetector");
>>
>
> There is one 'p' too much in "comppiler.EncodingDetector
Thanks. Fixed.
Mark
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org
Re: svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper:
compiler/EncodingDetector.java security/SecurityClassLoad.java
Posted by Martin Grigorov <mg...@apache.org>.
Hi Mark,
On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:
> Author: markt
> Date: Thu Dec 15 21:34:06 2016
> New Revision: 1774526
>
> URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
> Log:
> Add a new encoding detector implementation.
> The BoM encoding detection is based in the previous code.
> The prolog encoding detection delegates to the JRE's XM<L parser rather
> than the custom Jasper parser.
>
> Added:
> tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> (with props)
> Modified:
> tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>
> Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> (added)
> +++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> Thu Dec 15 21:34:06 2016
> @@ -0,0 +1,214 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.jasper.compiler;
> +
> +import java.io.BufferedInputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +
> +import javax.xml.stream.XMLInputFactory;
> +import javax.xml.stream.XMLStreamException;
> +import javax.xml.stream.XMLStreamReader;
> +
> +/*
> + * The BoM detection is derived from:
> + * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/
> apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
> + */
> +class EncodingDetector {
> +
> + private static final XMLInputFactory XML_INPUT_FACTORY;
> + static {
> + XML_INPUT_FACTORY = XMLInputFactory.newFactory();
> + }
> +
> + private final BomResult bomResult;
> + private final String prologEncoding;
> +
> +
> + /*
> + * TODO: Refactor Jasper InputStream creation and handling so the
> + * InputStream passed to this method is buffered and therefore
> saves
> + * on multiple opening and re-opening of the same file.
> + */
> + EncodingDetector(InputStream is) throws IOException {
> + // Keep buffer size to a minimum here. BoM will be no more than 4
> bytes
> + // so that is the maximum we need to buffer
> + BufferedInputStream bis = new BufferedInputStream(is, 4);
> + bis.mark(4);
> +
> + bomResult = processBom(bis);
> +
> + // Reset the stream back to the start to allow the XML prolog
> detection
> + // to work. Skip any BoM we discovered.
> + bis.reset();
> + if (bomResult != null) {
> + for (int i = 0; i < bomResult.skip; i++) {
> + is.read();
> + }
> + }
> +
> + prologEncoding = getPrologEncoding(bis);
> + }
> +
> +
> + String getBomEncoding() {
> + return bomResult.encoding;
> + }
> +
> +
> + Boolean getBigEndian() {
> + return bomResult.bigEndian;
> + }
> +
> +
> + int getSkip() {
> + return bomResult.skip;
> + }
> +
> +
> + String getPrologEncoding() {
> + return prologEncoding;
> + }
> +
> +
> + private String getPrologEncoding(InputStream stream) {
> + String encoding = null;
> + try {
> + XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.
> createXMLStreamReader(stream);
> + encoding = xmlStreamReader.getCharacterEncodingScheme();
> + } catch (XMLStreamException e) {
> + // Ignore
> + }
> + return encoding;
> + }
> +
> +
> + private BomResult processBom(InputStream stream) {
> + // Read first four bytes (or as many are available) and determine
> + // encoding
> + try {
> + final byte[] b4 = new byte[4];
> + int count = 0;
> + int singleByteRead;
> + while (count < 4) {
> + singleByteRead = stream.read();
> + if (singleByteRead == -1) {
> + break;
> + }
> + b4[count] = (byte) singleByteRead;
> + count++;
> + }
> +
> + return parseBom(b4, count);
> + } catch (IOException ioe) {
> + // Failed.
> + return new BomResult("UTF-8", null, 0);
> + }
> + }
> +
> +
> + private BomResult parseBom(byte[] b4, int count) {
> +
> + if (count < 2) {
> + return new BomResult("UTF-8", null, 0);
> + }
> +
> + // UTF-16, with BOM
> + int b0 = b4[0] & 0xFF;
> + int b1 = b4[1] & 0xFF;
> + if (b0 == 0xFE && b1 == 0xFF) {
> + // UTF-16, big-endian
> + return new BomResult("UTF-16BE", Boolean.TRUE, 2);
> + }
> + if (b0 == 0xFF && b1 == 0xFE) {
> + // UTF-16, little-endian
> + return new BomResult("UTF-16LE", Boolean.FALSE, 2);
> + }
> +
> + // default to UTF-8 if we don't have enough bytes to make a
> + // good determination of the encoding
> + if (count < 3) {
> + return new BomResult("UTF-8", null, 0);
> + }
> +
> + // UTF-8 with a BOM
> + int b2 = b4[2] & 0xFF;
> + if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
> + return new BomResult("UTF-8", null, 3);
> + }
> +
> + // default to UTF-8 if we don't have enough bytes to make a
> + // good determination of the encoding
> + if (count < 4) {
> + return new BomResult("UTF-8", null, 0);
> + }
> +
> + // other encodings
> + int b3 = b4[3] & 0xFF;
> + if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
> + // UCS-4, big endian (1234)
> + return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
> + }
> + if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
> + // UCS-4, little endian (4321)
> + return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
> + }
> + if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
> + // UCS-4, unusual octet order (2143)
> + // REVISIT: What should this be?
> + return new BomResult("ISO-10646-UCS-4", null, 4);
> + }
> + if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
> + // UCS-4, unusual octect order (3412)
> + // REVISIT: What should this be?
> + return new BomResult("ISO-10646-UCS-4", null, 4);
> + }
> + if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
> + // UTF-16, big-endian, no BOM
> + // (or could turn out to be UCS-2...
> + // REVISIT: What should this be?
> + return new BomResult("UTF-16BE", Boolean.TRUE, 4);
> + }
> + if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
> + // UTF-16, little-endian, no BOM
> + // (or could turn out to be UCS-2...
> + return new BomResult("UTF-16LE", Boolean.FALSE, 4);
> + }
> + if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
> + // EBCDIC
> + // a la xerces1, return CP037 instead of EBCDIC here
> + return new BomResult("CP037", null, 4);
> + }
> +
> + // default encoding
> + return new BomResult("UTF-8", null, 0);
> + }
> +
> +
> + private static class BomResult {
> +
> + public final String encoding;
> + public final Boolean bigEndian;
> + public final int skip;
> +
> + public BomResult(String encoding, Boolean bigEndian, int skip) {
> + this.encoding = encoding;
> + this.bigEndian = bigEndian;
> + this.skip = skip;
> + }
> + }
> +}
>
> Propchange: tomcat/trunk/java/org/apache/jasper/compiler/
> EncodingDetector.java
> ------------------------------------------------------------
> ------------------
> svn:eol-style = native
>
> Modified: tomcat/trunk/java/org/apache/jasper/security/
> SecurityClassLoad.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&
> view=diff
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> (original)
> +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> Thu Dec 15 21:34:06 2016
> @@ -39,6 +39,10 @@ public final class SecurityClassLoad {
>
> final String basePackage = "org.apache.jasper.";
> try {
> + // Ensure XMLInputFactory is loaded with Tomcat's class loader
> + loader.loadClass( basePackage +
> + "comppiler.EncodingDetector");
>
There is one 'p' too much in "comppiler.EncodingDetector
> +
> loader.loadClass( basePackage +
> "runtime.JspFactoryImpl$PrivilegedGetPageContext");
> loader.loadClass( basePackage +
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
> For additional commands, e-mail: dev-help@tomcat.apache.org
>
>