You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/11/11 12:25:16 UTC
svn commit: r1200813 -
/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
Author: jukka
Date: Fri Nov 11 11:25:15 2011
New Revision: 1200813
URL: http://svn.apache.org/viewvc?rev=1200813&view=rev
Log:
TIKA-780: Optimize loading of the media type registry
Use a static map of normalized media type names to speed up type parsing.
Optimize the in-memory layout of MediaType for the common case of accessing the full canonical media type string instead of the separate type and subtype components.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=1200813&r1=1200812&r2=1200813&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Fri Nov 11 11:25:15 2011
@@ -36,9 +36,6 @@ public final class MediaType implements
*/
private static final long serialVersionUID = -3831000556189036392L;
- private static final SortedMap<String, String> NO_PARAMETERS =
- Collections.unmodifiableSortedMap(new TreeMap<String, String>());
-
private static final Pattern SPECIAL =
Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
@@ -60,41 +57,50 @@ public final class MediaType implements
"(?is)\\s*(charset\\s*=\\s*[^\\c;\\s]+)\\s*;\\s*"
+ VALID_CHARS + "\\s*/\\s*" + VALID_CHARS + "\\s*");
- public static final MediaType OCTET_STREAM = application("octet-stream");
+ /**
+ * Set of basic types with normalized "type/subtype" names.
+ * Used to optimize type lookup and to avoid having too many
+ * {@link MediaType} instances in memory.
+ */
+ private static final Map<String, MediaType> SIMPLE_TYPES =
+ new HashMap<String, MediaType>();
+
+ public static final MediaType OCTET_STREAM =
+ parse("application/octet-stream");
- public static final MediaType TEXT_PLAIN = text("plain");
+ public static final MediaType TEXT_PLAIN = parse("text/plain");
- public static final MediaType APPLICATION_XML = application("xml");
+ public static final MediaType APPLICATION_XML = parse("application/xml");
- public static final MediaType APPLICATION_ZIP = application("zip");
+ public static final MediaType APPLICATION_ZIP = parse("application/zip");
public static MediaType application(String type) {
- return new MediaType("application", type);
+ return MediaType.parse("application/" + type);
}
public static MediaType audio(String type) {
- return new MediaType("audio", type);
+ return MediaType.parse("audio/" + type);
}
public static MediaType image(String type) {
- return new MediaType("image", type);
+ return MediaType.parse("image/" + type);
}
public static MediaType text(String type) {
- return new MediaType("text", type);
+ return MediaType.parse("text/" + type);
}
public static MediaType video(String type) {
- return new MediaType("video", type);
+ return MediaType.parse("video/" + type);
}
/**
- * Parses the given string to a media type. The string is expected to be of
- * the form "type/subtype(; parameter=...)*" as defined in RFC 2045, though
- * we also handle "charset=xxx; type/subtype" for broken web servers.
- *
- * @param string
- * media type string to be parsed
+ * Parses the given string to a media type. The string is expected
+ * to be of the form "type/subtype(; parameter=...)*" as defined in
+ * RFC 2045, though we also handle "charset=xxx; type/subtype" for
+ * broken web servers.
+ *
+ * @param string media type string to be parsed
* @return parsed media type, or <code>null</code> if parsing fails
*/
public static MediaType parse(String string) {
@@ -102,16 +108,22 @@ public final class MediaType implements
return null;
}
- int slash = string.indexOf('/');
- if (slash == -1) {
- return null;
- }
-
- // Optimization for the common case
- String type = string.substring(0, slash);
- String subtype = string.substring(slash + 1);
- if (isValidName(type) && isValidName(subtype)) {
- return new MediaType(type, subtype);
+ // Optimization for the common cases
+ synchronized (SIMPLE_TYPES) {
+ MediaType type = SIMPLE_TYPES.get(string);
+ if (type == null) {
+ int slash = string.indexOf('/');
+ if (slash == -1) {
+ return null;
+ } else if (isSimpleName(string.substring(0, slash))
+ && isSimpleName(string.substring(slash + 1))) {
+ type = new MediaType(string, slash);
+ SIMPLE_TYPES.put(string, type);
+ }
+ }
+ if (type != null) {
+ return type;
+ }
}
Matcher matcher;
@@ -131,12 +143,11 @@ public final class MediaType implements
return null;
}
- private static boolean isValidName(String name) {
+ private static boolean isSimpleName(String name) {
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
if (c != '-' && c != '+' && c != '.' && c != '_'
&& !('0' <= c && c <= '9')
- && !('A' <= c && c <= 'Z')
&& !('a' <= c && c <= 'z')) {
return false;
}
@@ -146,7 +157,7 @@ public final class MediaType implements
private static Map<String, String> parseParameters(String string) {
if (string.length() == 0) {
- return NO_PARAMETERS;
+ return Collections.<String, String>emptyMap();
}
Map<String, String> parameters = new HashMap<String, String>();
@@ -176,33 +187,86 @@ public final class MediaType implements
return parameters;
}
- private final String type;
+ /**
+ * Canonical string representation of this media type.
+ */
+ private final String string;
- private final String subtype;
+ /**
+ * Location of the "/" character separating the type and the subtype
+ * tokens in {@link #string}.
+ */
+ private final int slash;
/**
- * Immutable map of media type parameters.
+ * Location of the first ";" character separating the type part of
+ * {@link #string} from possible parameters. Length of {@link #string}
+ * in case there are no parameters.
*/
- private final SortedMap<String, String> parameters;
+ private final int semicolon;
+
+ /**
+ * Immutable sorted map of media type parameters.
+ */
+ private final Map<String, String> parameters;
public MediaType(
String type, String subtype, Map<String, String> parameters) {
- this.type = type.trim().toLowerCase(Locale.ENGLISH);
- this.subtype = subtype.trim().toLowerCase(Locale.ENGLISH);
+ type = type.trim().toLowerCase(Locale.ENGLISH);
+ subtype = subtype.trim().toLowerCase(Locale.ENGLISH);
+
+ this.slash = type.length();
+ this.semicolon = slash + 1 + subtype.length();
+
if (parameters.isEmpty()) {
- this.parameters = NO_PARAMETERS;
+ this.parameters = Collections.emptyMap();
+ this.string = type + '/' + subtype;
} else {
+ StringBuilder builder = new StringBuilder();
+ builder.append(type);
+ builder.append('/');
+ builder.append(subtype);
+
SortedMap<String, String> map = new TreeMap<String, String>();
+ if (!(parameters instanceof SortedMap<?, ?>)) {
+ parameters = new TreeMap<String, String>(parameters);
+ }
for (Map.Entry<String, String> entry : parameters.entrySet()) {
- map.put(entry.getKey().trim().toLowerCase(Locale.ENGLISH),
- entry.getValue());
+ String key = entry.getKey().trim().toLowerCase(Locale.ENGLISH);
+ String value = entry.getValue();
+
+ map.put(key, value);
+
+ builder.append("; ");
+ builder.append(key);
+ builder.append("=");
+ if (SPECIAL_OR_WHITESPACE.matcher(value).find()) {
+ builder.append('"');
+ builder.append(SPECIAL.matcher(value).replaceAll("\\\\$0"));
+ builder.append('"');
+ } else {
+ builder.append(value);
+ }
}
+
+ this.string = builder.toString();
this.parameters = Collections.unmodifiableSortedMap(map);
}
}
public MediaType(String type, String subtype) {
- this(type, subtype, NO_PARAMETERS);
+ this(type, subtype, Collections.<String, String>emptyMap());
+ }
+
+ private MediaType(String string, int slash) {
+ assert slash != -1;
+ assert string.charAt(slash) == '/';
+ assert isSimpleName(string.substring(0, slash));
+ assert isSimpleName(string.substring(slash + 1));
+ this.string = string;
+ this.slash = slash;
+ this.semicolon = string.length();
+ this.parameters = Collections.emptyMap();
}
private static Map<String, String> union(
@@ -220,23 +284,24 @@ public final class MediaType implements
}
public MediaType(MediaType type, Map<String, String> parameters) {
- this(type.type, type.subtype, union(type.parameters, parameters));
+ this(type.getType(), type.getSubtype(),
+ union(type.parameters, parameters));
}
public MediaType getBaseType() {
if (parameters.isEmpty()) {
return this;
} else {
- return new MediaType(type, subtype);
+ return MediaType.parse(string.substring(0, semicolon));
}
}
public String getType() {
- return type;
+ return string.substring(0, slash);
}
public String getSubtype() {
- return subtype;
+ return string.substring(slash + 1, semicolon);
}
/**
@@ -261,47 +326,24 @@ public final class MediaType implements
}
public String toString() {
- StringBuilder builder = new StringBuilder();
- builder.append(type);
- builder.append('/');
- builder.append(subtype);
- for (Map.Entry<String, String> entry : parameters.entrySet()) {
- builder.append("; ");
- builder.append(entry.getKey());
- builder.append("=");
- String value = entry.getValue();
- if (SPECIAL_OR_WHITESPACE.matcher(value).find()) {
- builder.append('"');
- builder.append(SPECIAL.matcher(value).replaceAll("\\\\$0"));
- builder.append('"');
- } else {
- builder.append(value);
- }
- }
- return builder.toString();
+ return string;
}
public boolean equals(Object object) {
if (object instanceof MediaType) {
MediaType that = (MediaType) object;
- return type.equals(that.type)
- && subtype.equals(that.subtype)
- && parameters.equals(that.parameters);
+ return string.equals(that.string);
} else {
return false;
}
}
public int hashCode() {
- int hash = 17;
- hash = hash * 31 + type.hashCode();
- hash = hash * 31 + subtype.hashCode();
- hash = hash * 31 + parameters.hashCode();
- return hash;
+ return string.hashCode();
}
public int compareTo(MediaType that) {
- return toString().compareTo(that.toString());
+ return string.compareTo(that.string);
}
}