You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tomee.apache.org by jl...@apache.org on 2021/04/16 21:43:58 UTC

[tomee-jakarta] branch master updated: Add support for not stripping out the BOM - patch to be created and submitted to Tomcat

This is an automated email from the ASF dual-hosted git repository.

jlmonteiro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomee-jakarta.git


The following commit(s) were added to refs/heads/master by this push:
     new 8803f8c  Add support for not stripping out the BOM - patch to be created and submitted to Tomcat
8803f8c is described below

commit 8803f8c27c181e3aa1543574c8d533410de90889
Author: Jean-Louis Monteiro <jl...@tomitribe.com>
AuthorDate: Fri Apr 16 15:29:55 2021 +0200

    Add support for not stripping out the BOM - patch to be created and submitted to Tomcat
    
    Signed-off-by: Jean-Louis Monteiro <jl...@tomitribe.com>
---
 .../apache/catalina/servlets/DefaultServlet.java   | 63 +++++++++++++++-------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/transform/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java b/tomee/apache-tomee/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java
similarity index 97%
rename from transform/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java
rename to tomee/apache-tomee/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java
index 3acf5e8..d822c3f 100644
--- a/transform/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/tomee/apache-tomee/src/patch/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -150,7 +150,7 @@ public class DefaultServlet extends HttpServlet {
     /**
      * Full range marker.
      */
-    protected static final Ranges FULL = new Ranges(null, new ArrayList<>());
+    protected static final Ranges FULL = new Ranges(null, new ArrayList<Ranges.Entry>());
 
     private static final ContentRange IGNORE = new ContentRange(null, 0, 0, 0);
 
@@ -246,8 +246,17 @@ public class DefaultServlet extends HttpServlet {
 
     /**
      * If a file has a BOM, should that be used in preference to fileEncoding?
+     *
+     * - true  - BoM is stripped if present and any BoM found used to determine
+     *            the encoding used to read the resource. This is the default.
+     *
+     * - false - BoM is stripped and resource is read using the configured file
+     *            encoding (which will be the platform default if not explicitly
+     *            configured)
+     *
+     * - path-through - as current false but does not strip the BoM from the output
      */
-    private boolean useBomIfPresent = true;
+    private String useBomIfPresent = "true";
 
     /**
      * Minimum size for sendfile usage in bytes.
@@ -335,8 +344,15 @@ public class DefaultServlet extends HttpServlet {
             }
         }
 
-        if (getServletConfig().getInitParameter("useBomIfPresent") != null) {
-            useBomIfPresent = Boolean.parseBoolean(getServletConfig().getInitParameter("useBomIfPresent"));
+        final String useBomIfPresentConfig = getServletConfig().getInitParameter("useBomIfPresent");
+        if (useBomIfPresentConfig != null) {
+            if (!Arrays.asList("true", "false", "pass-through").contains(useBomIfPresentConfig)) {
+                if (debug > 0) {
+                    log("DefaultServlet.init:  unsupported value " + useBomIfPresentConfig + " for useBomIfPresent." +
+                        " One of 'true', 'false', 'pass-through' is expected. Using 'true' by default.");
+                }
+            }
+            useBomIfPresent = useBomIfPresentConfig;
         }
 
         globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
@@ -1083,8 +1099,8 @@ public class DefaultServlet extends HttpServlet {
                             if (!renderResult.markSupported()) {
                                 renderResult = new BufferedInputStream(renderResult);
                             }
-                            Charset bomCharset = processBom(renderResult);
-                            if (bomCharset != null && useBomIfPresent) {
+                            Charset bomCharset = processBom(renderResult, isStripBOM());
+                            if (bomCharset != null && "true".equals(useBomIfPresent)) {
                                 inputEncoding = bomCharset.name();
                             }
                         }
@@ -1105,8 +1121,8 @@ public class DefaultServlet extends HttpServlet {
                             if (!source.markSupported()) {
                                 source = new BufferedInputStream(source);
                             }
-                            Charset bomCharset = processBom(source);
-                            if (bomCharset != null && useBomIfPresent) {
+                            Charset bomCharset = processBom(source, isStripBOM());
+                            if (bomCharset != null && "true".equals(useBomIfPresent)) {
                                 inputEncoding = bomCharset.name();
                             }
                             // Following test also ensures included resources
@@ -1213,11 +1229,20 @@ public class DefaultServlet extends HttpServlet {
         }
     }
 
+    /*
+     * useBomIfPresent can take 3 values (see init): true, false and pass-through
+     *
+     * When later is used, then not only we'll ignore the BOM and use the configured encoding
+     * but we'll also leave the BOM in the output
+     */
+    private boolean isStripBOM() {
+        return !"pass-through".equals(useBomIfPresent);
+    }
 
     /*
      * Code borrowed heavily from Jasper's EncodingDetector
      */
-    private static Charset processBom(InputStream is) throws IOException {
+    private static Charset processBom(final InputStream is, final boolean stripBOM) throws IOException {
         // Java supported character sets do not use BOMs longer than 4 bytes
         byte[] bom = new byte[4];
         is.mark(bom.length);
@@ -1226,7 +1251,7 @@ public class DefaultServlet extends HttpServlet {
 
         // BOMs are at least 2 bytes
         if (count < 2) {
-            skip(is, 0);
+            skip(is, 0, stripBOM);
             return null;
         }
 
@@ -1234,31 +1259,31 @@ public class DefaultServlet extends HttpServlet {
         int b0 = bom[0] & 0xFF;
         int b1 = bom[1] & 0xFF;
         if (b0 == 0xFE && b1 == 0xFF) {
-            skip(is, 2);
+            skip(is, 2, stripBOM);
             return StandardCharsets.UTF_16BE;
         }
         // Delay the UTF_16LE check if there are more that 2 bytes since it
         // overlaps with UTF-32LE.
         if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
-            skip(is, 2);
+            skip(is, 2, stripBOM);
             return StandardCharsets.UTF_16LE;
         }
 
         // Remaining BOMs are at least 3 bytes
         if (count < 3) {
-            skip(is, 0);
+            skip(is, 0, stripBOM);
             return null;
         }
 
         // UTF-8 is only 3-byte BOM
         int b2 = bom[2] & 0xFF;
         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
-            skip(is, 3);
+            skip(is, 3, stripBOM);
             return StandardCharsets.UTF_8;
         }
 
         if (count < 4) {
-            skip(is, 0);
+            skip(is, 0, stripBOM);
             return null;
         }
 
@@ -1275,18 +1300,18 @@ public class DefaultServlet extends HttpServlet {
         // won't see a UTF16-LE file with a BOM where the first real data is
         // 0x00 0x00
         if (b0 == 0xFF && b1 == 0xFE) {
-            skip(is, 2);
+            skip(is, 2, stripBOM);
             return StandardCharsets.UTF_16LE;
         }
 
-        skip(is, 0);
+        skip(is, 0, stripBOM);
         return null;
     }
 
 
-    private static void skip(InputStream is, int skip) throws IOException {
+    private static void skip(final InputStream is, int skip, final boolean stripBOM) throws IOException {
         is.reset();
-        while (skip-- > 0) {
+        while (stripBOM && skip-- > 0) {
             is.read();
         }
     }