You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2019/03/14 13:23:39 UTC
[tomcat] branch 8.5.x updated: Fix
https://bz.apache.org/bugzilla/show_bug.cgi?id=63235
This is an automated email from the ASF dual-hosted git repository.
markt pushed a commit to branch 8.5.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/8.5.x by this push:
new b10ceba Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235
b10ceba is described below
commit b10ceba2f88f8ebec7e5479a66fb5b689ff46dfa
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Fri Mar 8 20:45:38 2019 +0000
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235
Implement an alternative Charset caching strategy that does not load all
Charsets at Tomcat start. This reduces start time by ~30ms and does not,
based on the performance tests included in this commit, have a negative
impact on runtime look-ups.
It does require that the names of all supported Charsets are known to
Tomcat at compile time. The code has been tested with a range of JVMs
and a unit test is provided for testing new JVMs.
---
java/org/apache/tomcat/util/buf/B2CConverter.java | 21 +--
java/org/apache/tomcat/util/buf/CharsetCache.java | 134 ++++++++++++++++++++
.../apache/tomcat/util/buf/TestCharsetCache.java | 65 ++++++++++
.../util/buf/TestCharsetCachePerformance.java | 141 +++++++++++++++++++++
webapps/docs/changelog.xml | 3 +
5 files changed, 348 insertions(+), 16 deletions(-)
diff --git a/java/org/apache/tomcat/util/buf/B2CConverter.java b/java/org/apache/tomcat/util/buf/B2CConverter.java
index ef8e706..2dd63c0 100644
--- a/java/org/apache/tomcat/util/buf/B2CConverter.java
+++ b/java/org/apache/tomcat/util/buf/B2CConverter.java
@@ -25,9 +25,7 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
import java.util.Locale;
-import java.util.Map;
import org.apache.tomcat.util.res.StringManager;
@@ -39,23 +37,14 @@ public class B2CConverter {
private static final StringManager sm =
StringManager.getManager(Constants.Package);
- private static final Map<String, Charset> encodingToCharsetCache =
- new HashMap<>();
-
- // Protected so unit tests can use it
- protected static final int LEFTOVER_SIZE = 9;
+ private static final CharsetCache charsetCache;
static {
- for (Charset charset: Charset.availableCharsets().values()) {
- encodingToCharsetCache.put(
- charset.name().toLowerCase(Locale.ENGLISH), charset);
- for (String alias : charset.aliases()) {
- encodingToCharsetCache.put(
- alias.toLowerCase(Locale.ENGLISH), charset);
- }
- }
+ charsetCache = new CharsetCache();
}
+ // Protected so unit tests can use it
+ protected static final int LEFTOVER_SIZE = 9;
/**
* Obtain the Charset for the given encoding
@@ -93,7 +82,7 @@ public class B2CConverter {
public static Charset getCharsetLower(String lowerCaseEnc)
throws UnsupportedEncodingException {
- Charset charset = encodingToCharsetCache.get(lowerCaseEnc);
+ Charset charset = charsetCache.getCharset(lowerCaseEnc);
if (charset == null) {
// Pre-population of the cache means this must be invalid
diff --git a/java/org/apache/tomcat/util/buf/CharsetCache.java b/java/org/apache/tomcat/util/buf/CharsetCache.java
new file mode 100644
index 0000000..bde4583
--- /dev/null
+++ b/java/org/apache/tomcat/util/buf/CharsetCache.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+public class CharsetCache {
+
+ private static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" };
+
+ /*
+ * Tested with:
+ * - Oracle JDK 8 u192
+ * - OpenJDK 13 EA 4
+ */
+ private static final String[] LAZY_CHARSETS = new String[] {
+ "big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", "gb2312", "gbk", "ibm-thai", "ibm00858",
+ "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145", "ibm01146", "ibm01147", "ibm01148",
+ "ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", "ibm278", "ibm280", "ibm284", "ibm285",
+ "ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", "ibm775", "ibm850", "ibm852", "ibm855",
+ "ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", "ibm865", "ibm866", "ibm868", "ibm869",
+ "ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", "iso-2022-jp-2", "iso-2022-kr", "iso-8859-13",
+ "iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7",
+ "iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", "jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis",
+ "tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", "utf-32be", "utf-32le", "x-utf-32be-bom",
+ "x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254",
+ "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "x-big5-hkscs-2001",
+ "x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097",
+ "x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", "x-ibm1129", "x-ibm1166", "x-ibm1364",
+ "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856",
+ "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", "x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939",
+ "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950",
+ "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", "x-iso-2022-cn-gb", "x-jis0208",
+ "x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-macarabic",
+ "x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew",
+ "x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine",
+ "x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", "x-eucjp-open", "x-iso-8859-11", "x-mswin-936",
+ "x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950",
+ "x-windows-iso2022jp"
+ };
+
+ private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null);
+
+ private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
+
+ public CharsetCache() {
+ // Pre-populate the cache
+ for (String charsetName : INITIAL_CHARSETS) {
+ Charset charset = Charset.forName(charsetName);
+ addToCache(charsetName, charset);
+ }
+
+ for (String charsetName : LAZY_CHARSETS) {
+ addToCache(charsetName, DUMMY_CHARSET);
+ }
+ }
+
+
+ private void addToCache(String name, Charset charset) {
+ cache.put(name, charset);
+ for (String alias : charset.aliases()) {
+ cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
+ }
+ }
+
+
+ public Charset getCharset(String charsetName) {
+ String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
+
+ Charset result = cache.get(lcCharsetName);
+
+ if (result == DUMMY_CHARSET) {
+ // Name is known but the Charset is not in the cache
+ Charset charset = Charset.forName(lcCharsetName);
+ if (charset == null) {
+ // Charset not available in this JVM - remove cache entry
+ cache.remove(lcCharsetName);
+ result = null;
+ } else {
+ // Charset is available - populate cache entry
+ addToCache(lcCharsetName, charset);
+ result = charset;
+ }
+ }
+
+ return result;
+ }
+
+
+ /*
+ * Placeholder Charset implementation for entries that will be loaded lazily
+ * into the cache.
+ */
+ private static class DummyCharset extends Charset {
+
+ protected DummyCharset(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ @Override
+ public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override
+ public CharsetDecoder newDecoder() {
+ return null;
+ }
+
+ @Override
+ public CharsetEncoder newEncoder() {
+ return null;
+ }
+ }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCache.java b/test/org/apache/tomcat/util/buf/TestCharsetCache.java
new file mode 100644
index 0000000..011b08c
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetCache.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestCharsetCache {
+
+ @Test
+ public void testAllKnownCharsets() {
+ CharsetCache cache = new CharsetCache();
+
+ List<String> cacheMisses = new ArrayList<>();
+
+ for (Charset charset: Charset.availableCharsets().values()) {
+ if (cache.getCharset(charset.name()) == null) {
+ cacheMisses.add(charset.name());
+ } else {
+ for (String alias : charset.aliases()) {
+ if (cache.getCharset(alias) == null) {
+ cacheMisses.add(alias);
+ }
+ }
+ }
+ }
+
+ if (cacheMisses.size() != 0) {
+ StringBuilder sb = new StringBuilder();
+ Collections.sort(cacheMisses);
+ for (String name : cacheMisses) {
+ if (sb.length() == 0) {
+ sb.append('"');
+ } else {
+ sb.append(", \"");
+ }
+ sb.append(name.toLowerCase(Locale.ENGLISH));
+ sb.append('"');
+ }
+ System.out.println(sb.toString());
+ }
+
+ Assert.assertTrue(cacheMisses.size() == 0);
+ }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java
new file mode 100644
index 0000000..6823890
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.junit.Test;
+
+public class TestCharsetCachePerformance {
+
+ @Test
+ public void testNoCsCache() throws Exception {
+ doTest(new NoCsCache());
+ }
+
+
+ @Test
+ public void testFullCsCache() throws Exception {
+ doTest(new FullCsCache());
+ }
+
+
+ @Test
+ public void testLazyCsCache() throws Exception {
+ doTest(new LazyCsCache());
+ }
+
+
+ private void doTest(CsCache cache) throws Exception {
+ int threadCount = 10;
+ int iterations = 10000000;
+ String[] lookupNames = new String[] {
+ "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5" };
+
+ Thread[] threads = new Thread[threadCount];
+
+ for (int i = 0; i < threadCount; i++) {
+ threads[i] = new TestCsCacheThread(iterations, cache, lookupNames);
+ }
+
+ long startTime = System.nanoTime();
+
+ for (int i = 0; i < threadCount; i++) {
+ threads[i].start();
+ }
+
+ for (int i = 0; i < threadCount; i++) {
+ threads[i].join();
+ }
+
+ long endTime = System.nanoTime();
+
+ System.out.println(cache.getClass().getName() + ": " + (endTime - startTime) + "ns");
+ }
+
+
+ private static interface CsCache {
+ Charset getCharset(String charsetName);
+ }
+
+
+ private static class NoCsCache implements CsCache {
+
+ @Override
+ public Charset getCharset(String charsetName) {
+ return Charset.forName(charsetName);
+ }
+ }
+
+
+ private static class FullCsCache implements CsCache {
+
+ private static final Map<String,Charset> cache = new HashMap<>();
+
+ static {
+ for (Charset charset: Charset.availableCharsets().values()) {
+ cache.put(charset.name().toLowerCase(Locale.ENGLISH), charset);
+ for (String alias : charset.aliases()) {
+ cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
+ }
+ }
+ }
+
+
+ @Override
+ public Charset getCharset(String charsetName) {
+ return cache.get(charsetName.toLowerCase(Locale.ENGLISH));
+ }
+ }
+
+
+ private static class LazyCsCache implements CsCache {
+
+ private CharsetCache cache = new CharsetCache();
+
+ @Override
+ public Charset getCharset(String charsetName) {
+ return cache.getCharset(charsetName);
+ }
+ }
+
+
+ private static class TestCsCacheThread extends Thread {
+
+ private final int iterations;
+ private final CsCache cache;
+ private final String[] lookupNames;
+ private final int lookupNamesCount;
+
+ public TestCsCacheThread(int iterations, CsCache cache, String[] lookupNames) {
+ this.iterations = iterations;
+ this.cache = cache;
+ this.lookupNames = lookupNames;
+ this.lookupNamesCount = lookupNames.length;
+ }
+
+ @Override
+ public void run() {
+ for (int i = 0; i < iterations; i++) {
+ cache.getCharset(lookupNames[i % lookupNamesCount]);
+ }
+ }
+ }
+}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 38e2884..4dad208 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -53,6 +53,9 @@
<code>RemoteIpFilter</code> and <code>RemoteIpValve</code>. (markt)
</fix>
<fix>
+ <bug>63235</bug>: Refactor Charset cache to reduce start time. (markt)
+ </fix>
+ <fix>
<bug>63249</bug>: Use a consistent log level (<code>WARN</code>) when
logging the failure to register or deregister a JMX Bean. (markt)
</fix>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org