You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2016/12/15 21:34:06 UTC

svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper: compiler/EncodingDetector.java security/SecurityClassLoad.java

Author: markt
Date: Thu Dec 15 21:34:06 2016
New Revision: 1774526

URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
Log:
Add a new encoding detector implementation.
The BoM encoding detection is based in the previous code.
The prolog encoding detection delegates to the JRE's XM<L parser rather than the custom Jasper parser.

Added:
    tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java   (with props)
Modified:
    tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java

Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (added)
+++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java Thu Dec 15 21:34:06 2016
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jasper.compiler;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/*
+ * The BoM detection is derived from:
+ * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
+ */
+class EncodingDetector {
+
+    private static final XMLInputFactory XML_INPUT_FACTORY;
+    static {
+        XML_INPUT_FACTORY = XMLInputFactory.newFactory();
+    }
+
+    private final BomResult bomResult;
+    private final String prologEncoding;
+
+
+    /*
+     * TODO: Refactor Jasper InputStream creation and handling so the
+     *       InputStream passed to this method is buffered and therefore saves
+     *       on multiple opening and re-opening of the same file.
+     */
+    EncodingDetector(InputStream is) throws IOException {
+        // Keep buffer size to a minimum here. BoM will be no more than 4 bytes
+        // so that is the maximum we need to buffer
+        BufferedInputStream bis = new BufferedInputStream(is, 4);
+        bis.mark(4);
+
+        bomResult = processBom(bis);
+
+        // Reset the stream back to the start to allow the XML prolog detection
+        // to work. Skip any BoM we discovered.
+        bis.reset();
+        if (bomResult != null) {
+            for (int i = 0; i < bomResult.skip; i++) {
+                is.read();
+            }
+        }
+
+        prologEncoding = getPrologEncoding(bis);
+    }
+
+
+    String getBomEncoding() {
+        return bomResult.encoding;
+    }
+
+
+    Boolean getBigEndian() {
+        return bomResult.bigEndian;
+    }
+
+
+    int getSkip() {
+        return bomResult.skip;
+    }
+
+
+    String getPrologEncoding() {
+        return prologEncoding;
+    }
+
+
+    private String getPrologEncoding(InputStream stream) {
+        String encoding = null;
+        try {
+            XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.createXMLStreamReader(stream);
+            encoding = xmlStreamReader.getCharacterEncodingScheme();
+        } catch (XMLStreamException e) {
+            // Ignore
+        }
+        return encoding;
+    }
+
+
+    private BomResult processBom(InputStream stream) {
+        // Read first four bytes (or as many are available) and determine
+        // encoding
+        try {
+            final byte[] b4 = new byte[4];
+            int count = 0;
+            int singleByteRead;
+            while (count < 4) {
+                singleByteRead = stream.read();
+                if (singleByteRead == -1) {
+                    break;
+                }
+                b4[count] = (byte) singleByteRead;
+                count++;
+            }
+
+            return parseBom(b4, count);
+        } catch (IOException ioe) {
+            // Failed.
+            return new BomResult("UTF-8", null,  0);
+        }
+    }
+
+
+    private BomResult parseBom(byte[] b4, int count) {
+
+        if (count < 2) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // UTF-16, with BOM
+        int b0 = b4[0] & 0xFF;
+        int b1 = b4[1] & 0xFF;
+        if (b0 == 0xFE && b1 == 0xFF) {
+            // UTF-16, big-endian
+            return new BomResult("UTF-16BE", Boolean.TRUE, 2);
+        }
+        if (b0 == 0xFF && b1 == 0xFE) {
+            // UTF-16, little-endian
+            return new BomResult("UTF-16LE", Boolean.FALSE, 2);
+        }
+
+        // default to UTF-8 if we don't have enough bytes to make a
+        // good determination of the encoding
+        if (count < 3) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // UTF-8 with a BOM
+        int b2 = b4[2] & 0xFF;
+        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+            return new BomResult("UTF-8", null, 3);
+        }
+
+        // default to UTF-8 if we don't have enough bytes to make a
+        // good determination of the encoding
+        if (count < 4) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // other encodings
+        int b3 = b4[3] & 0xFF;
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
+            // UCS-4, big endian (1234)
+            return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
+        }
+        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
+            // UCS-4, little endian (4321)
+            return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
+            // UCS-4, unusual octet order (2143)
+            // REVISIT: What should this be?
+            return new BomResult("ISO-10646-UCS-4", null, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
+            // UCS-4, unusual octect order (3412)
+            // REVISIT: What should this be?
+            return new BomResult("ISO-10646-UCS-4", null, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+            // UTF-16, big-endian, no BOM
+            // (or could turn out to be UCS-2...
+            // REVISIT: What should this be?
+            return new BomResult("UTF-16BE", Boolean.TRUE, 4);
+        }
+        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+            // UTF-16, little-endian, no BOM
+            // (or could turn out to be UCS-2...
+            return new BomResult("UTF-16LE", Boolean.FALSE, 4);
+        }
+        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
+            // EBCDIC
+            // a la xerces1, return CP037 instead of EBCDIC here
+            return new BomResult("CP037", null, 4);
+        }
+
+        // default encoding
+        return new BomResult("UTF-8", null,  0);
+    }
+
+
+    private static class BomResult {
+
+        public final String encoding;
+        public final Boolean bigEndian;
+        public final int skip;
+
+        public BomResult(String encoding,  Boolean bigEndian, int skip) {
+            this.encoding = encoding;
+            this.bigEndian = bigEndian;
+            this.skip = skip;
+        }
+    }
+}

Propchange: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java (original)
+++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java Thu Dec 15 21:34:06 2016
@@ -39,6 +39,10 @@ public final class SecurityClassLoad {
 
         final String basePackage = "org.apache.jasper.";
         try {
+            // Ensure XMLInputFactory is loaded with Tomcat's class loader
+            loader.loadClass( basePackage +
+                    "comppiler.EncodingDetector");
+
             loader.loadClass( basePackage +
                 "runtime.JspFactoryImpl$PrivilegedGetPageContext");
             loader.loadClass( basePackage +



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper: compiler/EncodingDetector.java security/SecurityClassLoad.java

Posted by Mark Thomas <ma...@apache.org>.
On 15/12/2016 21:48, Martin Grigorov wrote:
> Hi Mark,
> 
> On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:
> 
>> Author: markt
>> Date: Thu Dec 15 21:34:06 2016
>> New Revision: 1774526
>>
>> URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
>> Log:
>> Add a new encoding detector implementation.
>> The BoM encoding detection is based in the previous code.
>> The prolog encoding detection delegates to the JRE's XM<L parser rather
>> than the custom Jasper parser.

<snip/>

>> Modified: tomcat/trunk/java/org/apache/jasper/security/
>> SecurityClassLoad.java
>> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
>> jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&
>> view=diff
>> ============================================================
>> ==================
>> --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>> (original)
>> +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>> Thu Dec 15 21:34:06 2016
>> @@ -39,6 +39,10 @@ public final class SecurityClassLoad {
>>
>>          final String basePackage = "org.apache.jasper.";
>>          try {
>> +            // Ensure XMLInputFactory is loaded with Tomcat's class loader
>> +            loader.loadClass( basePackage +
>> +                    "comppiler.EncodingDetector");
>>
> 
> There is one 'p' too much in "comppiler.EncodingDetector

Thanks. Fixed.

Mark


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: svn commit: r1774526 - in /tomcat/trunk/java/org/apache/jasper: compiler/EncodingDetector.java security/SecurityClassLoad.java

Posted by Martin Grigorov <mg...@apache.org>.
Hi Mark,

On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:

> Author: markt
> Date: Thu Dec 15 21:34:06 2016
> New Revision: 1774526
>
> URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
> Log:
> Add a new encoding detector implementation.
> The BoM encoding detection is based in the previous code.
> The prolog encoding detection delegates to the JRE's XM<L parser rather
> than the custom Jasper parser.
>
> Added:
>     tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
>  (with props)
> Modified:
>     tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>
> Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> (added)
> +++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> Thu Dec 15 21:34:06 2016
> @@ -0,0 +1,214 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.jasper.compiler;
> +
> +import java.io.BufferedInputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +
> +import javax.xml.stream.XMLInputFactory;
> +import javax.xml.stream.XMLStreamException;
> +import javax.xml.stream.XMLStreamReader;
> +
> +/*
> + * The BoM detection is derived from:
> + * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/
> apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
> + */
> +class EncodingDetector {
> +
> +    private static final XMLInputFactory XML_INPUT_FACTORY;
> +    static {
> +        XML_INPUT_FACTORY = XMLInputFactory.newFactory();
> +    }
> +
> +    private final BomResult bomResult;
> +    private final String prologEncoding;
> +
> +
> +    /*
> +     * TODO: Refactor Jasper InputStream creation and handling so the
> +     *       InputStream passed to this method is buffered and therefore
> saves
> +     *       on multiple opening and re-opening of the same file.
> +     */
> +    EncodingDetector(InputStream is) throws IOException {
> +        // Keep buffer size to a minimum here. BoM will be no more than 4
> bytes
> +        // so that is the maximum we need to buffer
> +        BufferedInputStream bis = new BufferedInputStream(is, 4);
> +        bis.mark(4);
> +
> +        bomResult = processBom(bis);
> +
> +        // Reset the stream back to the start to allow the XML prolog
> detection
> +        // to work. Skip any BoM we discovered.
> +        bis.reset();
> +        if (bomResult != null) {
> +            for (int i = 0; i < bomResult.skip; i++) {
> +                is.read();
> +            }
> +        }
> +
> +        prologEncoding = getPrologEncoding(bis);
> +    }
> +
> +
> +    String getBomEncoding() {
> +        return bomResult.encoding;
> +    }
> +
> +
> +    Boolean getBigEndian() {
> +        return bomResult.bigEndian;
> +    }
> +
> +
> +    int getSkip() {
> +        return bomResult.skip;
> +    }
> +
> +
> +    String getPrologEncoding() {
> +        return prologEncoding;
> +    }
> +
> +
> +    private String getPrologEncoding(InputStream stream) {
> +        String encoding = null;
> +        try {
> +            XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.
> createXMLStreamReader(stream);
> +            encoding = xmlStreamReader.getCharacterEncodingScheme();
> +        } catch (XMLStreamException e) {
> +            // Ignore
> +        }
> +        return encoding;
> +    }
> +
> +
> +    private BomResult processBom(InputStream stream) {
> +        // Read first four bytes (or as many are available) and determine
> +        // encoding
> +        try {
> +            final byte[] b4 = new byte[4];
> +            int count = 0;
> +            int singleByteRead;
> +            while (count < 4) {
> +                singleByteRead = stream.read();
> +                if (singleByteRead == -1) {
> +                    break;
> +                }
> +                b4[count] = (byte) singleByteRead;
> +                count++;
> +            }
> +
> +            return parseBom(b4, count);
> +        } catch (IOException ioe) {
> +            // Failed.
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +    }
> +
> +
> +    private BomResult parseBom(byte[] b4, int count) {
> +
> +        if (count < 2) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // UTF-16, with BOM
> +        int b0 = b4[0] & 0xFF;
> +        int b1 = b4[1] & 0xFF;
> +        if (b0 == 0xFE && b1 == 0xFF) {
> +            // UTF-16, big-endian
> +            return new BomResult("UTF-16BE", Boolean.TRUE, 2);
> +        }
> +        if (b0 == 0xFF && b1 == 0xFE) {
> +            // UTF-16, little-endian
> +            return new BomResult("UTF-16LE", Boolean.FALSE, 2);
> +        }
> +
> +        // default to UTF-8 if we don't have enough bytes to make a
> +        // good determination of the encoding
> +        if (count < 3) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // UTF-8 with a BOM
> +        int b2 = b4[2] & 0xFF;
> +        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
> +            return new BomResult("UTF-8", null, 3);
> +        }
> +
> +        // default to UTF-8 if we don't have enough bytes to make a
> +        // good determination of the encoding
> +        if (count < 4) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // other encodings
> +        int b3 = b4[3] & 0xFF;
> +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
> +            // UCS-4, big endian (1234)
> +            return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
> +        }
> +        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
> +            // UCS-4, little endian (4321)
> +            return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
> +            // UCS-4, unusual octet order (2143)
> +            // REVISIT: What should this be?
> +            return new BomResult("ISO-10646-UCS-4", null, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
> +            // UCS-4, unusual octect order (3412)
> +            // REVISIT: What should this be?
> +            return new BomResult("ISO-10646-UCS-4", null, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
> +            // UTF-16, big-endian, no BOM
> +            // (or could turn out to be UCS-2...
> +            // REVISIT: What should this be?
> +            return new BomResult("UTF-16BE", Boolean.TRUE, 4);
> +        }
> +        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
> +            // UTF-16, little-endian, no BOM
> +            // (or could turn out to be UCS-2...
> +            return new BomResult("UTF-16LE", Boolean.FALSE, 4);
> +        }
> +        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
> +            // EBCDIC
> +            // a la xerces1, return CP037 instead of EBCDIC here
> +            return new BomResult("CP037", null, 4);
> +        }
> +
> +        // default encoding
> +        return new BomResult("UTF-8", null,  0);
> +    }
> +
> +
> +    private static class BomResult {
> +
> +        public final String encoding;
> +        public final Boolean bigEndian;
> +        public final int skip;
> +
> +        public BomResult(String encoding,  Boolean bigEndian, int skip) {
> +            this.encoding = encoding;
> +            this.bigEndian = bigEndian;
> +            this.skip = skip;
> +        }
> +    }
> +}
>
> Propchange: tomcat/trunk/java/org/apache/jasper/compiler/
> EncodingDetector.java
> ------------------------------------------------------------
> ------------------
>     svn:eol-style = native
>
> Modified: tomcat/trunk/java/org/apache/jasper/security/
> SecurityClassLoad.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&
> view=diff
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> (original)
> +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> Thu Dec 15 21:34:06 2016
> @@ -39,6 +39,10 @@ public final class SecurityClassLoad {
>
>          final String basePackage = "org.apache.jasper.";
>          try {
> +            // Ensure XMLInputFactory is loaded with Tomcat's class loader
> +            loader.loadClass( basePackage +
> +                    "comppiler.EncodingDetector");
>

There is one 'p' too much in "comppiler.EncodingDetector


> +
>              loader.loadClass( basePackage +
>                  "runtime.JspFactoryImpl$PrivilegedGetPageContext");
>              loader.loadClass( basePackage +
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
> For additional commands, e-mail: dev-help@tomcat.apache.org
>
>