You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2023/01/18 16:05:59 UTC

[tomcat] branch main updated: Refactor storage of current character encoding for request/response

This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/main by this push:
     new 021b1696b3 Refactor storage of current character encoding for request/response
021b1696b3 is described below

commit 021b1696b3d71f9feed77c048fe7a8e7cf243ae8
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed Jan 18 16:05:50 2023 +0000

    Refactor storage of current character encoding for request/response
---
 java/org/apache/coyote/Request.java                |  27 ++---
 java/org/apache/coyote/Response.java               |  33 +++---
 java/org/apache/tomcat/util/buf/CharsetHolder.java | 113 +++++++++++++++++++++
 3 files changed, 135 insertions(+), 38 deletions(-)

diff --git a/java/org/apache/coyote/Request.java b/java/org/apache/coyote/Request.java
index 6908cdb7f0..8fdcfdb6ed 100644
--- a/java/org/apache/coyote/Request.java
+++ b/java/org/apache/coyote/Request.java
@@ -29,7 +29,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import jakarta.servlet.ReadListener;
 import jakarta.servlet.ServletConnection;
 
-import org.apache.tomcat.util.buf.B2CConverter;
+import org.apache.tomcat.util.buf.CharsetHolder;
 import org.apache.tomcat.util.buf.MessageBytes;
 import org.apache.tomcat.util.buf.UDecoder;
 import org.apache.tomcat.util.http.MimeHeaders;
@@ -148,10 +148,7 @@ public final class Request {
      */
     private long contentLength = -1;
     private MessageBytes contentTypeMB = null;
-    private Charset charset = null;
-    // Retain the original, user specified character encoding so it can be
-    // returned even if it is invalid
-    private String characterEncoding = null;
+    private CharsetHolder charsetHolder = CharsetHolder.EMPTY;
 
     /**
      * Is there an expectation ?
@@ -409,11 +406,11 @@ public final class Request {
      *         content type.
      */
     public String getCharacterEncoding() {
-        if (characterEncoding == null) {
-            characterEncoding = getCharsetFromContentType(getContentType());
+        if (charsetHolder.getName() == null) {
+            charsetHolder = CharsetHolder.getInstance(getCharsetFromContentType(getContentType()));
         }
 
-        return characterEncoding;
+        return charsetHolder.getName();
     }
 
 
@@ -428,20 +425,17 @@ public final class Request {
      *         invalid character encoding
      */
     public Charset getCharset() throws UnsupportedEncodingException {
-        if (charset == null) {
+        if (charsetHolder.getName() == null) {
+            // Populates charsetHolder
             getCharacterEncoding();
-            if (characterEncoding != null) {
-                charset = B2CConverter.getCharset(characterEncoding);
-            }
          }
 
-        return charset;
+        return charsetHolder.getValidatedCharset();
     }
 
 
     public void setCharset(Charset charset) {
-        this.charset = charset;
-        this.characterEncoding = charset.name();
+        charsetHolder = CharsetHolder.getInstance(charset);
     }
 
 
@@ -784,8 +778,7 @@ public final class Request {
 
         contentLength = -1;
         contentTypeMB = null;
-        charset = null;
-        characterEncoding = null;
+        charsetHolder = CharsetHolder.EMPTY;
         expectation = false;
         headers.recycle();
         trailerFields.clear();
diff --git a/java/org/apache/coyote/Response.java b/java/org/apache/coyote/Response.java
index b2a32387d7..f5fb215c22 100644
--- a/java/org/apache/coyote/Response.java
+++ b/java/org/apache/coyote/Response.java
@@ -32,7 +32,7 @@ import jakarta.servlet.WriteListener;
 
 import org.apache.juli.logging.Log;
 import org.apache.juli.logging.LogFactory;
-import org.apache.tomcat.util.buf.B2CConverter;
+import org.apache.tomcat.util.buf.CharsetHolder;
 import org.apache.tomcat.util.buf.MessageBytes;
 import org.apache.tomcat.util.http.MimeHeaders;
 import org.apache.tomcat.util.http.parser.MediaType;
@@ -113,11 +113,7 @@ public final class Response {
      */
     String contentType = null;
     String contentLanguage = null;
-    Charset charset = null;
-    // Retain the original name used to set the charset so exactly that name is
-    // used in the ContentType header. Some (arguably non-specification
-    // compliant) user agents are very particular
-    String characterEncoding = null;
+    private CharsetHolder charsetHolder = CharsetHolder.EMPTY;
     long contentLength = -1;
     private Locale locale = DEFAULT_LOCALE;
 
@@ -505,19 +501,13 @@ public final class Response {
         if (isCommitted()) {
             return;
         }
-        if (characterEncoding == null) {
-            this.charset = null;
-            this.characterEncoding = null;
-            return;
-        }
 
-        this.characterEncoding = characterEncoding;
-        this.charset = B2CConverter.getCharset(characterEncoding);
+        charsetHolder = CharsetHolder.getValidatedInstance(characterEncoding);
     }
 
 
     public Charset getCharset() {
-        return charset;
+        return charsetHolder.getCharset();
     }
 
 
@@ -525,7 +515,7 @@ public final class Response {
      * @return The name of the current encoding
      */
     public String getCharacterEncoding() {
-        return characterEncoding;
+        return charsetHolder.getName();
     }
 
 
@@ -573,7 +563,7 @@ public final class Response {
             charsetValue = charsetValue.trim();
             if (charsetValue.length() > 0) {
                 try {
-                    charset = B2CConverter.getCharset(charsetValue);
+                    charsetHolder = CharsetHolder.getValidatedInstance(charsetValue);
                 } catch (UnsupportedEncodingException e) {
                     log.warn(sm.getString("response.encoding.invalid", charsetValue), e);
                 }
@@ -588,9 +578,11 @@ public final class Response {
     public String getContentType() {
 
         String ret = contentType;
-
-        if (ret != null && charset != null) {
-            ret = ret + ";charset=" + characterEncoding;
+        if (ret != null) {
+            String charsetName = charsetHolder.getName();
+            if (charsetName != null) {
+                ret = ret + ";charset=" + charsetName;
+            }
         }
 
         return ret;
@@ -634,8 +626,7 @@ public final class Response {
         contentType = null;
         contentLanguage = null;
         locale = DEFAULT_LOCALE;
-        charset = null;
-        characterEncoding = null;
+        charsetHolder = CharsetHolder.EMPTY;
         contentLength = -1;
         status = 200;
         message = null;
diff --git a/java/org/apache/tomcat/util/buf/CharsetHolder.java b/java/org/apache/tomcat/util/buf/CharsetHolder.java
new file mode 100755
index 0000000000..da0f912565
--- /dev/null
+++ b/java/org/apache/tomcat/util/buf/CharsetHolder.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+
+/**
+ * Represents a character encoding to be used for a request or response.
+ *
+ * <p>Historically the Servlet API has used Strings for this information with
+ * lazy conversion to Charset. Sometimes the API required that an invalid name
+ * triggered an exception. Sometimes the invalid name was treated as if it had
+ * never been set. This resulted in classes storing both the String
+ * and, if the name was valid, the Charset with validation and conversion logic
+ * spread throughout those classes.
+ *
+ * <p>This class is an attempt to encapsulate that behvaiour.
+ */
+public class CharsetHolder {
+
+    public static CharsetHolder EMPTY = new CharsetHolder(null, null);
+
+    public static CharsetHolder getInstance(String name) {
+        if (name == null) {
+            return EMPTY;
+        }
+
+        Charset charset;
+        try {
+            charset = B2CConverter.getCharset(name);
+        } catch (UnsupportedEncodingException e) {
+            charset = null;
+        }
+
+        return new CharsetHolder(name, charset);
+    }
+
+
+    public static CharsetHolder getValidatedInstance(String name) throws UnsupportedEncodingException {
+        if (name == null) {
+            return EMPTY;
+        }
+
+        return new CharsetHolder(name, B2CConverter.getCharset(name));
+    }
+
+
+    public static CharsetHolder getInstance(Charset encoding) {
+        if (encoding == null) {
+            return EMPTY;
+        }
+
+        return new CharsetHolder(encoding.name(), encoding);
+    }
+
+
+    private final String name;
+    private final Charset charset;
+
+
+    private CharsetHolder(String name, Charset charset) {
+        this.name = name;
+        this.charset = charset;
+    }
+
+
+    public String getName() {
+        return name;
+    }
+
+
+    /**
+     * Returns the Charset, {@code null} if no Charset has been specified, or
+     * {@code null} if the holder was created using the name of a Charset that
+     * the JRE does not recognise.
+     *
+     * @return The Charset or {@code null} it is not set or invalid
+     */
+    public Charset getCharset() {
+        return charset;
+    }
+
+
+    /**
+     * Returns the Charset or {@code null} if no Charset has been specified.
+     *
+     * @return The validated Charset or {@code null} if is not set
+     *
+     * @throws UnsupportedEncodingException if the holder was created using the
+     *     name of a Charset that the JRE does not recognise
+     */
+    public Charset getValidatedCharset() throws UnsupportedEncodingException {
+        if (name != null && charset == null) {
+            throw new UnsupportedEncodingException(name);
+        }
+        return charset;
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org