You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2023/06/26 09:44:15 UTC

[tomcat] 01/05: Add control of byte decoding errors to ByteChunk and StringCache

This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 8.5.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git

commit 04472f0d4ba3796f5e07ba98c38c82d9154d8e7f
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed Jun 14 12:25:21 2023 +0100

    Add control of byte decoding errors to ByteChunk and StringCache
---
 java/org/apache/tomcat/util/buf/ByteChunk.java     |  49 +++++++++-
 java/org/apache/tomcat/util/buf/StringCache.java   |  66 +++++++++++---
 test/org/apache/jasper/compiler/TestGenerator.java |   3 +-
 .../apache/tomcat/util/buf/TestStringCache.java    | 100 +++++++++++++++++++++
 4 files changed, 204 insertions(+), 14 deletions(-)

diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java b/java/org/apache/tomcat/util/buf/ByteChunk.java
index a1a65cfdbc..ef02a4f86b 100644
--- a/java/org/apache/tomcat/util/buf/ByteChunk.java
+++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
@@ -21,7 +21,9 @@ import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.tomcat.util.res.StringManager;
@@ -526,23 +528,64 @@ public final class ByteChunk extends AbstractChunk {
 
     @Override
     public String toString() {
-        if (null == buff) {
+        try {
+            return toString(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    public String toString(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+            throws CharacterCodingException {
+        if (isNull()) {
             return null;
         } else if (end - start == 0) {
             return "";
         }
-        return StringCache.toString(this);
+        return StringCache.toString(this, malformedInputAction, unmappableCharacterAction);
     }
 
 
+    /**
+     * Converts the current content of the byte buffer to a String using the configured character set.
+     *
+     * @return The result of converting the bytes to a String
+     *
+     * @deprecated Unused. This method will be removed in Tomcat 11 onwards.
+     */
+    @Deprecated
     public String toStringInternal() {
+        try {
+            return toStringInternal(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    /**
+     * Converts the current content of the byte buffer to a String using the configured character set.
+     *
+     * @param malformedInputAction      Action to take if the input is malformed
+     * @param unmappableCharacterAction Action to take if a byte sequence can't be mapped to a character
+     *
+     * @return The result of converting the bytes to a String
+     *
+     * @throws CharacterCodingException If an error occurs during the conversion
+     */
+    public String toStringInternal(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+            throws CharacterCodingException {
         if (charset == null) {
             charset = DEFAULT_CHARSET;
         }
         // new String(byte[], int, int, Charset) takes a defensive copy of the
         // entire byte array. This is expensive if only a small subset of the
         // bytes will be used. The code below is from Apache Harmony.
-        CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
+        CharBuffer cb = charset.newDecoder().onMalformedInput(malformedInputAction)
+                .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
         return new String(cb.array(), cb.arrayOffset(), cb.length());
     }
 
diff --git a/java/org/apache/tomcat/util/buf/StringCache.java b/java/org/apache/tomcat/util/buf/StringCache.java
index 3ba1e6eebf..929ec58607 100644
--- a/java/org/apache/tomcat/util/buf/StringCache.java
+++ b/java/org/apache/tomcat/util/buf/StringCache.java
@@ -16,10 +16,13 @@
  */
 package org.apache.tomcat.util.buf;
 
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map.Entry;
+import java.util.Objects;
 import java.util.TreeMap;
 
 import org.apache.juli.logging.Log;
@@ -208,11 +211,22 @@ public class StringCache {
 
 
     public static String toString(ByteChunk bc) {
+        try {
+            return toString(bc, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    public static String toString(ByteChunk bc, CodingErrorAction malformedInputAction,
+            CodingErrorAction unmappableCharacterAction) throws CharacterCodingException {
 
         // If the cache is null, then either caching is disabled, or we're
         // still training
         if (bcCache == null) {
-            String value = bc.toStringInternal();
+            String value = bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
             if (byteEnabled && (value.length() < maxStringSize)) {
                 // If training, everything is synced
                 synchronized (bcStats) {
@@ -291,6 +305,8 @@ public class StringCache {
                             System.arraycopy(bc.getBuffer(), start, entry.name, 0, end - start);
                             // Set encoding
                             entry.charset = bc.getCharset();
+                            entry.malformedInputAction = malformedInputAction;
+                            entry.unmappableCharacterAction = unmappableCharacterAction;
                             // Initialize occurrence count to one
                             count = new int[1];
                             count[0] = 1;
@@ -306,9 +322,9 @@ public class StringCache {
         } else {
             accessCount++;
             // Find the corresponding String
-            String result = find(bc);
+            String result = find(bc, malformedInputAction, unmappableCharacterAction);
             if (result == null) {
-                return bc.toStringInternal();
+                return bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
             }
             // Note: We don't care about safety for the stats
             hitCount++;
@@ -473,10 +489,31 @@ public class StringCache {
      * @param name The name to find
      *
      * @return the corresponding value
+     *
+     * @deprecated Unused. Will be removed in Tomcat 11.
+     *             Use {@link #find(ByteChunk, CodingErrorAction, CodingErrorAction)}
      */
+    @Deprecated
     protected static final String find(ByteChunk name) {
+        return find(name, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+    }
+
+
+    /**
+     * Find an entry given its name in the cache and return the associated String.
+     *
+     * @param name                      The name to find
+     * @param malformedInputAction      Action to take if an malformed input is encountered
+     * @param unmappableCharacterAction Action to take if an unmappable character is encountered
+     *
+     * @return the corresponding value
+     */
+    protected static final String find(ByteChunk name, CodingErrorAction malformedInputAction,
+        CodingErrorAction unmappableCharacterAction) {
         int pos = findClosest(name, bcCache, bcCache.length);
-        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset))) {
+        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset)) ||
+                !malformedInputAction.equals(bcCache[pos].malformedInputAction) ||
+                !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) {
             return null;
         } else {
             return bcCache[pos].value;
@@ -642,11 +679,12 @@ public class StringCache {
 
     // -------------------------------------------------- ByteEntry Inner Class
 
-
     private static class ByteEntry {
 
         private byte[] name = null;
         private Charset charset = null;
+        private CodingErrorAction malformedInputAction = null;
+        private CodingErrorAction unmappableCharacterAction = null;
         private String value = null;
 
         @Override
@@ -656,17 +694,25 @@ public class StringCache {
 
         @Override
         public int hashCode() {
-            return value.hashCode();
+            return Objects.hash(malformedInputAction, unmappableCharacterAction, value);
         }
 
         @Override
         public boolean equals(Object obj) {
-            if (obj instanceof ByteEntry) {
-                return value.equals(((ByteEntry) obj).value);
+            if (this == obj) {
+                return true;
             }
-            return false;
+            if (obj == null) {
+                return false;
+            }
+            if (getClass() != obj.getClass()) {
+                return false;
+            }
+            ByteEntry other = (ByteEntry) obj;
+            return Objects.equals(malformedInputAction, other.malformedInputAction) &&
+                    Objects.equals(unmappableCharacterAction, other.unmappableCharacterAction) &&
+                    Objects.equals(value, other.value);
         }
-
     }
 
 
diff --git a/test/org/apache/jasper/compiler/TestGenerator.java b/test/org/apache/jasper/compiler/TestGenerator.java
index 4e3a07b250..f776af6ea8 100644
--- a/test/org/apache/jasper/compiler/TestGenerator.java
+++ b/test/org/apache/jasper/compiler/TestGenerator.java
@@ -21,6 +21,7 @@ import java.beans.Introspector;
 import java.beans.PropertyDescriptor;
 import java.beans.PropertyEditorSupport;
 import java.io.IOException;
+import java.nio.charset.CodingErrorAction;
 import java.util.Date;
 
 import javax.servlet.http.HttpServletResponse;
@@ -205,7 +206,7 @@ public class TestGenerator extends TomcatBaseTest {
         ByteChunk bc = new ByteChunk();
         int rc = getUrl("http://localhost:" + getPort() + "/test/bug5nnnn/bug56529.jsp", bc, null);
         Assert.assertEquals(HttpServletResponse.SC_OK, rc);
-        String response = bc.toStringInternal();
+        String response = bc.toStringInternal(CodingErrorAction.REPORT, CodingErrorAction.REPORT);
         Assert.assertTrue(response, response.contains("[1:attribute1: '', attribute2: '']"));
         Assert.assertTrue(response, response.contains("[2:attribute1: '', attribute2: '']"));
     }
diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java b/test/org/apache/tomcat/util/buf/TestStringCache.java
new file mode 100644
index 0000000000..2345d2de76
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestStringCache.java
@@ -0,0 +1,100 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestStringCache {
+
+    private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68};
+    private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, 68};
+
+    private static final ByteChunk INPUT_VALID = new ByteChunk();
+    private static final ByteChunk INPUT_INVALID = new ByteChunk();
+
+    private static final CodingErrorAction[] actions =
+            new CodingErrorAction[] { CodingErrorAction.IGNORE, CodingErrorAction.REPLACE, CodingErrorAction.REPORT };
+
+    static {
+        INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length);
+        INPUT_VALID.setCharset(StandardCharsets.UTF_8);
+        INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length);
+        INPUT_INVALID.setCharset(StandardCharsets.UTF_8);
+    }
+
+
+    @Test
+    public void testCodingErrorLookup() {
+
+        System.setProperty("tomcat.util.buf.StringCache.byte.enabled", "true");
+
+        Assert.assertTrue(StringCache.byteEnabled);
+        StringCache sc = new StringCache();
+        sc.reset();
+
+        for (int i = 0; i < StringCache.trainThreshold * 2; i++) {
+            for (CodingErrorAction malformedInputAction : actions) {
+                try {
+                    // UTF-8 doesn't have any unmappable characters
+                    INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+                    INPUT_INVALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+                } catch (CharacterCodingException e) {
+                    // Ignore
+                }
+            }
+        }
+
+        Assert.assertNotNull(StringCache.bcCache);
+
+        // Check the valid input is cached correctly
+        for (CodingErrorAction malformedInputAction : actions) {
+            try {
+                Assert.assertEquals("ABCD", INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE));
+            } catch (CharacterCodingException e) {
+                // Should not happen for valid input
+                Assert.fail();
+            }
+        }
+
+        // Check the valid input is cached correctly
+        try {
+            Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE));
+        } catch (CharacterCodingException e) {
+            // Should not happen for invalid input with IGNORE
+            Assert.fail();
+        }
+        try {
+            Assert.assertEquals("AB\ufffdCD", INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE));
+        } catch (CharacterCodingException e) {
+            // Should not happen for invalid input with REPLACE
+            Assert.fail();
+        }
+        try {
+            Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE));
+            // Should throw exception
+            Assert.fail();
+        } catch (CharacterCodingException e) {
+            // Ignore
+        }
+
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org