You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2023/06/26 09:28:29 UTC

[tomcat] branch 9.0.x updated (9bfc23fb97 -> 4b15583ee5)

This is an automated email from the ASF dual-hosted git repository.

markt pushed a change to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git


    from 9bfc23fb97 Simplify reading of request body for x-www-form-urlencoded processing
     new 2ce6cdb5ea Add control of byte decoding errors to ByteChunk and StringCache
     new 4b15583ee5 Use the previous decode call in the old default for now

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 java/org/apache/tomcat/util/buf/ByteChunk.java     |  52 ++++++++++-
 java/org/apache/tomcat/util/buf/StringCache.java   |  66 +++++++++++---
 test/org/apache/jasper/compiler/TestGenerator.java |   3 +-
 .../apache/tomcat/util/buf/TestStringCache.java    | 100 +++++++++++++++++++++
 4 files changed, 208 insertions(+), 13 deletions(-)
 create mode 100644 test/org/apache/tomcat/util/buf/TestStringCache.java


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


[tomcat] 02/02: Use the previous decode call in the old default for now

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git

commit 4b15583ee5781dd89b5181799b66949d2f27e514
Author: remm <re...@apache.org>
AuthorDate: Fri Jun 23 09:43:40 2023 +0200

    Use the previous decode call in the old default for now
    
    Just in case it makes a performance difference (the JVM code is very
    different).
---
 java/org/apache/tomcat/util/buf/ByteChunk.java | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java b/java/org/apache/tomcat/util/buf/ByteChunk.java
index 5872f41fc1..1002d2fe49 100644
--- a/java/org/apache/tomcat/util/buf/ByteChunk.java
+++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
@@ -622,8 +622,13 @@ public final class ByteChunk extends AbstractChunk {
         // new String(byte[], int, int, Charset) takes a defensive copy of the
         // entire byte array. This is expensive if only a small subset of the
         // bytes will be used. The code below is from Apache Harmony.
-        CharBuffer cb = charset.newDecoder().onMalformedInput(malformedInputAction)
-                .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
+        CharBuffer cb;
+        if (malformedInputAction == CodingErrorAction.REPLACE && unmappableCharacterAction == CodingErrorAction.REPLACE) {
+            cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
+        } else {
+            cb = charset.newDecoder().onMalformedInput(malformedInputAction)
+                    .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
+        }
         return new String(cb.array(), cb.arrayOffset(), cb.length());
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


[tomcat] 01/02: Add control of byte decoding errors to ByteChunk and StringCache

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git

commit 2ce6cdb5ea2de74832dea6ebd96f241788c8bb19
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed Jun 14 12:25:21 2023 +0100

    Add control of byte decoding errors to ByteChunk and StringCache
---
 java/org/apache/tomcat/util/buf/ByteChunk.java     |  47 +++++++++-
 java/org/apache/tomcat/util/buf/StringCache.java   |  66 +++++++++++---
 test/org/apache/jasper/compiler/TestGenerator.java |   3 +-
 .../apache/tomcat/util/buf/TestStringCache.java    | 100 +++++++++++++++++++++
 4 files changed, 203 insertions(+), 13 deletions(-)

diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java b/java/org/apache/tomcat/util/buf/ByteChunk.java
index 621f955af5..5872f41fc1 100644
--- a/java/org/apache/tomcat/util/buf/ByteChunk.java
+++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
@@ -21,7 +21,9 @@ import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 
 /*
@@ -564,23 +566,64 @@ public final class ByteChunk extends AbstractChunk {
 
     @Override
     public String toString() {
+        try {
+            return toString(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    public String toString(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+            throws CharacterCodingException {
         if (isNull()) {
             return null;
         } else if (end - start == 0) {
             return "";
         }
-        return StringCache.toString(this);
+        return StringCache.toString(this, malformedInputAction, unmappableCharacterAction);
     }
 
 
+    /**
+     * Converts the current content of the byte buffer to a String using the configured character set.
+     *
+     * @return The result of converting the bytes to a String
+     *
+     * @deprecated Unused. This method will be removed in Tomcat 11 onwards.
+     */
+    @Deprecated
     public String toStringInternal() {
+        try {
+            return toStringInternal(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    /**
+     * Converts the current content of the byte buffer to a String using the configured character set.
+     *
+     * @param malformedInputAction      Action to take if the input is malformed
+     * @param unmappableCharacterAction Action to take if a byte sequence can't be mapped to a character
+     *
+     * @return The result of converting the bytes to a String
+     *
+     * @throws CharacterCodingException If an error occurs during the conversion
+     */
+    public String toStringInternal(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+            throws CharacterCodingException {
         if (charset == null) {
             charset = DEFAULT_CHARSET;
         }
         // new String(byte[], int, int, Charset) takes a defensive copy of the
         // entire byte array. This is expensive if only a small subset of the
         // bytes will be used. The code below is from Apache Harmony.
-        CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
+        CharBuffer cb = charset.newDecoder().onMalformedInput(malformedInputAction)
+                .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
         return new String(cb.array(), cb.arrayOffset(), cb.length());
     }
 
diff --git a/java/org/apache/tomcat/util/buf/StringCache.java b/java/org/apache/tomcat/util/buf/StringCache.java
index d39de93cfa..5b82e44f74 100644
--- a/java/org/apache/tomcat/util/buf/StringCache.java
+++ b/java/org/apache/tomcat/util/buf/StringCache.java
@@ -16,10 +16,13 @@
  */
 package org.apache.tomcat.util.buf;
 
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map.Entry;
+import java.util.Objects;
 import java.util.TreeMap;
 
 import org.apache.juli.logging.Log;
@@ -208,11 +211,22 @@ public class StringCache {
 
 
     public static String toString(ByteChunk bc) {
+        try {
+            return toString(bc, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+        } catch (CharacterCodingException e) {
+            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+            throw new IllegalStateException(e);
+        }
+    }
+
+
+    public static String toString(ByteChunk bc, CodingErrorAction malformedInputAction,
+            CodingErrorAction unmappableCharacterAction) throws CharacterCodingException {
 
         // If the cache is null, then either caching is disabled, or we're
         // still training
         if (bcCache == null) {
-            String value = bc.toStringInternal();
+            String value = bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
             if (byteEnabled && (value.length() < maxStringSize)) {
                 // If training, everything is synced
                 synchronized (bcStats) {
@@ -285,6 +299,8 @@ public class StringCache {
                             System.arraycopy(bc.getBuffer(), start, entry.name, 0, end - start);
                             // Set encoding
                             entry.charset = bc.getCharset();
+                            entry.malformedInputAction = malformedInputAction;
+                            entry.unmappableCharacterAction = unmappableCharacterAction;
                             // Initialize occurrence count to one
                             count = new int[1];
                             count[0] = 1;
@@ -300,9 +316,9 @@ public class StringCache {
         } else {
             accessCount++;
             // Find the corresponding String
-            String result = find(bc);
+            String result = find(bc, malformedInputAction, unmappableCharacterAction);
             if (result == null) {
-                return bc.toStringInternal();
+                return bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
             }
             // Note: We don't care about safety for the stats
             hitCount++;
@@ -462,10 +478,31 @@ public class StringCache {
      * @param name The name to find
      *
      * @return the corresponding value
+     *
+     * @deprecated Unused. Will be removed in Tomcat 11.
+     *             Use {@link #find(ByteChunk, CodingErrorAction, CodingErrorAction)}
      */
+    @Deprecated
     protected static final String find(ByteChunk name) {
+        return find(name, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+    }
+
+
+    /**
+     * Find an entry given its name in the cache and return the associated String.
+     *
+     * @param name                      The name to find
+     * @param malformedInputAction      Action to take if an malformed input is encountered
+     * @param unmappableCharacterAction Action to take if an unmappable character is encountered
+     *
+     * @return the corresponding value
+     */
+    protected static final String find(ByteChunk name, CodingErrorAction malformedInputAction,
+        CodingErrorAction unmappableCharacterAction) {
         int pos = findClosest(name, bcCache, bcCache.length);
-        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset))) {
+        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset)) ||
+                !malformedInputAction.equals(bcCache[pos].malformedInputAction) ||
+                !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) {
             return null;
         } else {
             return bcCache[pos].value;
@@ -631,11 +668,12 @@ public class StringCache {
 
     // -------------------------------------------------- ByteEntry Inner Class
 
-
     private static class ByteEntry {
 
         private byte[] name = null;
         private Charset charset = null;
+        private CodingErrorAction malformedInputAction = null;
+        private CodingErrorAction unmappableCharacterAction = null;
         private String value = null;
 
         @Override
@@ -645,17 +683,25 @@ public class StringCache {
 
         @Override
         public int hashCode() {
-            return value.hashCode();
+            return Objects.hash(malformedInputAction, unmappableCharacterAction, value);
         }
 
         @Override
         public boolean equals(Object obj) {
-            if (obj instanceof ByteEntry) {
-                return value.equals(((ByteEntry) obj).value);
+            if (this == obj) {
+                return true;
             }
-            return false;
+            if (obj == null) {
+                return false;
+            }
+            if (getClass() != obj.getClass()) {
+                return false;
+            }
+            ByteEntry other = (ByteEntry) obj;
+            return Objects.equals(malformedInputAction, other.malformedInputAction) &&
+                    Objects.equals(unmappableCharacterAction, other.unmappableCharacterAction) &&
+                    Objects.equals(value, other.value);
         }
-
     }
 
 
diff --git a/test/org/apache/jasper/compiler/TestGenerator.java b/test/org/apache/jasper/compiler/TestGenerator.java
index 98579807fe..e7f0cdf254 100644
--- a/test/org/apache/jasper/compiler/TestGenerator.java
+++ b/test/org/apache/jasper/compiler/TestGenerator.java
@@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor;
 import java.beans.PropertyEditorSupport;
 import java.io.File;
 import java.io.IOException;
+import java.nio.charset.CodingErrorAction;
 import java.util.Date;
 import java.util.Scanner;
 
@@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest {
         ByteChunk bc = new ByteChunk();
         int rc = getUrl("http://localhost:" + getPort() + "/test/bug5nnnn/bug56529.jsp", bc, null);
         Assert.assertEquals(HttpServletResponse.SC_OK, rc);
-        String response = bc.toStringInternal();
+        String response = bc.toStringInternal(CodingErrorAction.REPORT, CodingErrorAction.REPORT);
         Assert.assertTrue(response, response.contains("[1:attribute1: '', attribute2: '']"));
         Assert.assertTrue(response, response.contains("[2:attribute1: '', attribute2: '']"));
     }
diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java b/test/org/apache/tomcat/util/buf/TestStringCache.java
new file mode 100644
index 0000000000..2345d2de76
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestStringCache.java
@@ -0,0 +1,100 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestStringCache {
+
+    private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68};
+    private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, 68};
+
+    private static final ByteChunk INPUT_VALID = new ByteChunk();
+    private static final ByteChunk INPUT_INVALID = new ByteChunk();
+
+    private static final CodingErrorAction[] actions =
+            new CodingErrorAction[] { CodingErrorAction.IGNORE, CodingErrorAction.REPLACE, CodingErrorAction.REPORT };
+
+    static {
+        INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length);
+        INPUT_VALID.setCharset(StandardCharsets.UTF_8);
+        INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length);
+        INPUT_INVALID.setCharset(StandardCharsets.UTF_8);
+    }
+
+
+    @Test
+    public void testCodingErrorLookup() {
+
+        System.setProperty("tomcat.util.buf.StringCache.byte.enabled", "true");
+
+        Assert.assertTrue(StringCache.byteEnabled);
+        StringCache sc = new StringCache();
+        sc.reset();
+
+        for (int i = 0; i < StringCache.trainThreshold * 2; i++) {
+            for (CodingErrorAction malformedInputAction : actions) {
+                try {
+                    // UTF-8 doesn't have any unmappable characters
+                    INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+                    INPUT_INVALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+                } catch (CharacterCodingException e) {
+                    // Ignore
+                }
+            }
+        }
+
+        Assert.assertNotNull(StringCache.bcCache);
+
+        // Check the valid input is cached correctly
+        for (CodingErrorAction malformedInputAction : actions) {
+            try {
+                Assert.assertEquals("ABCD", INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE));
+            } catch (CharacterCodingException e) {
+                // Should not happen for valid input
+                Assert.fail();
+            }
+        }
+
+        // Check the valid input is cached correctly
+        try {
+            Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE));
+        } catch (CharacterCodingException e) {
+            // Should not happen for invalid input with IGNORE
+            Assert.fail();
+        }
+        try {
+            Assert.assertEquals("AB\ufffdCD", INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE));
+        } catch (CharacterCodingException e) {
+            // Should not happen for invalid input with REPLACE
+            Assert.fail();
+        }
+        try {
+            Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE));
+            // Should throw exception
+            Assert.fail();
+        } catch (CharacterCodingException e) {
+            // Ignore
+        }
+
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org