You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2023/06/26 09:28:06 UTC
[tomcat] 01/02: Add control of byte decoding errors to ByteChunk and StringCache
This is an automated email from the ASF dual-hosted git repository.
markt pushed a commit to branch 10.1.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit 4aebe196cd02745689ced8d66c70f77c1a52af39
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed Jun 14 12:25:21 2023 +0100
Add control of byte decoding errors to ByteChunk and StringCache
---
java/org/apache/tomcat/util/buf/ByteChunk.java | 47 +++++++++-
java/org/apache/tomcat/util/buf/StringCache.java | 66 +++++++++++---
test/org/apache/jasper/compiler/TestGenerator.java | 3 +-
.../apache/tomcat/util/buf/TestStringCache.java | 100 +++++++++++++++++++++
4 files changed, 203 insertions(+), 13 deletions(-)
diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java b/java/org/apache/tomcat/util/buf/ByteChunk.java
index 101f9c0eaa..ff00f55774 100644
--- a/java/org/apache/tomcat/util/buf/ByteChunk.java
+++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
@@ -21,7 +21,9 @@ import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
/*
@@ -521,23 +523,64 @@ public final class ByteChunk extends AbstractChunk {
@Override
public String toString() {
+ try {
+ return toString(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+ } catch (CharacterCodingException e) {
+ // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+ throw new IllegalStateException(e);
+ }
+ }
+
+
+ public String toString(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+ throws CharacterCodingException {
if (isNull()) {
return null;
} else if (end - start == 0) {
return "";
}
- return StringCache.toString(this);
+ return StringCache.toString(this, malformedInputAction, unmappableCharacterAction);
}
+ /**
+ * Converts the current content of the byte buffer to a String using the configured character set.
+ *
+ * @return The result of converting the bytes to a String
+ *
+ * @deprecated Unused. This method will be removed in Tomcat 11 onwards.
+ */
+ @Deprecated
public String toStringInternal() {
+ try {
+ return toStringInternal(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+ } catch (CharacterCodingException e) {
+ // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+ throw new IllegalStateException(e);
+ }
+ }
+
+
+ /**
+ * Converts the current content of the byte buffer to a String using the configured character set.
+ *
+ * @param malformedInputAction Action to take if the input is malformed
+ * @param unmappableCharacterAction Action to take if a byte sequence can't be mapped to a character
+ *
+ * @return The result of converting the bytes to a String
+ *
+ * @throws CharacterCodingException If an error occurs during the conversion
+ */
+ public String toStringInternal(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
+ throws CharacterCodingException {
if (charset == null) {
charset = DEFAULT_CHARSET;
}
// new String(byte[], int, int, Charset) takes a defensive copy of the
// entire byte array. This is expensive if only a small subset of the
// bytes will be used. The code below is from Apache Harmony.
- CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
+ CharBuffer cb = charset.newDecoder().onMalformedInput(malformedInputAction)
+ .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
return new String(cb.array(), cb.arrayOffset(), cb.length());
}
diff --git a/java/org/apache/tomcat/util/buf/StringCache.java b/java/org/apache/tomcat/util/buf/StringCache.java
index d39de93cfa..5b82e44f74 100644
--- a/java/org/apache/tomcat/util/buf/StringCache.java
+++ b/java/org/apache/tomcat/util/buf/StringCache.java
@@ -16,10 +16,13 @@
*/
package org.apache.tomcat.util.buf;
+import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
+import java.util.Objects;
import java.util.TreeMap;
import org.apache.juli.logging.Log;
@@ -208,11 +211,22 @@ public class StringCache {
public static String toString(ByteChunk bc) {
+ try {
+ return toString(bc, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+ } catch (CharacterCodingException e) {
+ // Unreachable code. Use of REPLACE above means the exception will never be thrown.
+ throw new IllegalStateException(e);
+ }
+ }
+
+
+ public static String toString(ByteChunk bc, CodingErrorAction malformedInputAction,
+ CodingErrorAction unmappableCharacterAction) throws CharacterCodingException {
// If the cache is null, then either caching is disabled, or we're
// still training
if (bcCache == null) {
- String value = bc.toStringInternal();
+ String value = bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
if (byteEnabled && (value.length() < maxStringSize)) {
// If training, everything is synced
synchronized (bcStats) {
@@ -285,6 +299,8 @@ public class StringCache {
System.arraycopy(bc.getBuffer(), start, entry.name, 0, end - start);
// Set encoding
entry.charset = bc.getCharset();
+ entry.malformedInputAction = malformedInputAction;
+ entry.unmappableCharacterAction = unmappableCharacterAction;
// Initialize occurrence count to one
count = new int[1];
count[0] = 1;
@@ -300,9 +316,9 @@ public class StringCache {
} else {
accessCount++;
// Find the corresponding String
- String result = find(bc);
+ String result = find(bc, malformedInputAction, unmappableCharacterAction);
if (result == null) {
- return bc.toStringInternal();
+ return bc.toStringInternal(malformedInputAction, unmappableCharacterAction);
}
// Note: We don't care about safety for the stats
hitCount++;
@@ -462,10 +478,31 @@ public class StringCache {
* @param name The name to find
*
* @return the corresponding value
+ *
+ * @deprecated Unused. Will be removed in Tomcat 11.
+ * Use {@link #find(ByteChunk, CodingErrorAction, CodingErrorAction)}
*/
+ @Deprecated
protected static final String find(ByteChunk name) {
+ return find(name, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
+ }
+
+
+ /**
+ * Find an entry given its name in the cache and return the associated String.
+ *
+ * @param name The name to find
+ * @param malformedInputAction Action to take if an malformed input is encountered
+ * @param unmappableCharacterAction Action to take if an unmappable character is encountered
+ *
+ * @return the corresponding value
+ */
+ protected static final String find(ByteChunk name, CodingErrorAction malformedInputAction,
+ CodingErrorAction unmappableCharacterAction) {
int pos = findClosest(name, bcCache, bcCache.length);
- if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset))) {
+ if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset)) ||
+ !malformedInputAction.equals(bcCache[pos].malformedInputAction) ||
+ !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) {
return null;
} else {
return bcCache[pos].value;
@@ -631,11 +668,12 @@ public class StringCache {
// -------------------------------------------------- ByteEntry Inner Class
-
private static class ByteEntry {
private byte[] name = null;
private Charset charset = null;
+ private CodingErrorAction malformedInputAction = null;
+ private CodingErrorAction unmappableCharacterAction = null;
private String value = null;
@Override
@@ -645,17 +683,25 @@ public class StringCache {
@Override
public int hashCode() {
- return value.hashCode();
+ return Objects.hash(malformedInputAction, unmappableCharacterAction, value);
}
@Override
public boolean equals(Object obj) {
- if (obj instanceof ByteEntry) {
- return value.equals(((ByteEntry) obj).value);
+ if (this == obj) {
+ return true;
}
- return false;
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ByteEntry other = (ByteEntry) obj;
+ return Objects.equals(malformedInputAction, other.malformedInputAction) &&
+ Objects.equals(unmappableCharacterAction, other.unmappableCharacterAction) &&
+ Objects.equals(value, other.value);
}
-
}
diff --git a/test/org/apache/jasper/compiler/TestGenerator.java b/test/org/apache/jasper/compiler/TestGenerator.java
index 82921eb8fe..b8bd60a880 100644
--- a/test/org/apache/jasper/compiler/TestGenerator.java
+++ b/test/org/apache/jasper/compiler/TestGenerator.java
@@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor;
import java.beans.PropertyEditorSupport;
import java.io.File;
import java.io.IOException;
+import java.nio.charset.CodingErrorAction;
import java.util.Date;
import java.util.Scanner;
@@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest {
ByteChunk bc = new ByteChunk();
int rc = getUrl("http://localhost:" + getPort() + "/test/bug5nnnn/bug56529.jsp", bc, null);
Assert.assertEquals(HttpServletResponse.SC_OK, rc);
- String response = bc.toStringInternal();
+ String response = bc.toStringInternal(CodingErrorAction.REPORT, CodingErrorAction.REPORT);
Assert.assertTrue(response, response.contains("[1:attribute1: '', attribute2: '']"));
Assert.assertTrue(response, response.contains("[2:attribute1: '', attribute2: '']"));
}
diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java b/test/org/apache/tomcat/util/buf/TestStringCache.java
new file mode 100644
index 0000000000..2345d2de76
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestStringCache.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestStringCache {
+
+ private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68};
+ private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, 68};
+
+ private static final ByteChunk INPUT_VALID = new ByteChunk();
+ private static final ByteChunk INPUT_INVALID = new ByteChunk();
+
+ private static final CodingErrorAction[] actions =
+ new CodingErrorAction[] { CodingErrorAction.IGNORE, CodingErrorAction.REPLACE, CodingErrorAction.REPORT };
+
+ static {
+ INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length);
+ INPUT_VALID.setCharset(StandardCharsets.UTF_8);
+ INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length);
+ INPUT_INVALID.setCharset(StandardCharsets.UTF_8);
+ }
+
+
+ @Test
+ public void testCodingErrorLookup() {
+
+ System.setProperty("tomcat.util.buf.StringCache.byte.enabled", "true");
+
+ Assert.assertTrue(StringCache.byteEnabled);
+ StringCache sc = new StringCache();
+ sc.reset();
+
+ for (int i = 0; i < StringCache.trainThreshold * 2; i++) {
+ for (CodingErrorAction malformedInputAction : actions) {
+ try {
+ // UTF-8 doesn't have any unmappable characters
+ INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+ INPUT_INVALID.toString(malformedInputAction, CodingErrorAction.IGNORE);
+ } catch (CharacterCodingException e) {
+ // Ignore
+ }
+ }
+ }
+
+ Assert.assertNotNull(StringCache.bcCache);
+
+ // Check the valid input is cached correctly
+ for (CodingErrorAction malformedInputAction : actions) {
+ try {
+ Assert.assertEquals("ABCD", INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE));
+ } catch (CharacterCodingException e) {
+ // Should not happen for valid input
+ Assert.fail();
+ }
+ }
+
+ // Check the valid input is cached correctly
+ try {
+ Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE));
+ } catch (CharacterCodingException e) {
+ // Should not happen for invalid input with IGNORE
+ Assert.fail();
+ }
+ try {
+ Assert.assertEquals("AB\ufffdCD", INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE));
+ } catch (CharacterCodingException e) {
+ // Should not happen for invalid input with REPLACE
+ Assert.fail();
+ }
+ try {
+ Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE));
+ // Should throw exception
+ Assert.fail();
+ } catch (CharacterCodingException e) {
+ // Ignore
+ }
+
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org