You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/05/05 20:15:10 UTC

svn commit: r653548 - in /incubator/tika/trunk/src: main/java/org/apache/tika/mime/MediaType.java main/java/org/apache/tika/mime/MediaTypeRegistry.java test/java/org/apache/tika/mime/MediaTypeTest.java

Author: jukka
Date: Mon May  5 11:15:10 2008
New Revision: 653548

URL: http://svn.apache.org/viewvc?rev=653548&view=rev
Log:
TIKA-87: MimeTypes should allow modification of MIME types
TIKA-89: Rename MimeType and MimeTypes
    - Trying to decouple the MIME type registry from Tika configuration
    - Work in progress

Added:
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaType.java
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
    incubator/tika/trunk/src/test/java/org/apache/tika/mime/MediaTypeTest.java

Added: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaType.java?rev=653548&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaType.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaType.java Mon May  5 11:15:10 2008
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
+
+/**
+ * Internet media type.
+ */
+public final class MediaType {
+
+    private static final Map<String, String> NO_PARAMETERS =
+        Collections.emptyMap();
+
+    private static final Pattern SPECIAL =
+        Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
+
+    private static final Pattern SPECIAL_OR_WHITESPACE =
+        Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]");
+
+    public static final MediaType OCTET_STREAM =
+        new MediaType("application", "octet-stream", NO_PARAMETERS);
+
+    public static final MediaType TEXT_PLAIN =
+        new MediaType("text", "plain", NO_PARAMETERS);
+
+    public static final MediaType APPLICATION_XML =
+        new MediaType("application", "xml", NO_PARAMETERS);
+
+    /**
+     * Parses the given string to a media type. The string is expected
+     * to be of the form "type/subtype(; parameter=...)*" as defined
+     * in RFC 2045.
+     * <p>
+     * Note that currently this method only parses the "type/subtype" part
+     * of the string. Any parameters are simply discarded.
+     *
+     * @param string media type string to be parsed
+     * @return parsed media type
+     * @throws IllegalArgumentException if the string is not a media type
+     */
+    public static MediaType parse(String string) {
+        int slash = string.indexOf('/');
+        if (slash == -1) {
+            throw new IllegalArgumentException("Invalid media type: " + string);
+        }
+
+        String type = string.substring(0, slash);
+        String subtype = string.substring(slash + 1);
+        // String parameters = "";
+
+        int colon = subtype.indexOf(';');
+        if (colon != -1) {
+            // parameters = subtype.substring(colon + 1);
+            subtype = subtype.substring(0, colon);
+        }
+
+        return new MediaType(type, subtype);
+    }
+
+    private final String type;
+
+    private final String subtype;
+
+    private final SortedMap<String, String> parameters;
+
+    public MediaType(
+            String type, String subtype, Map<String, String> parameters) {
+        this.type = type.trim().toLowerCase();
+        this.subtype = subtype.trim().toLowerCase();
+        this.parameters = new TreeMap<String, String>();
+        for (Map.Entry<String, String> entry : parameters.entrySet()) {
+            this.parameters.put(
+                    entry.getKey().trim().toLowerCase(), entry.getValue());
+        }
+    }
+
+    public MediaType(String type, String subtype) {
+        this(type, subtype, NO_PARAMETERS);
+    }
+
+    private static Map<String, String> union(
+            Map<String, String> a, Map<String, String> b) {
+        if (a.isEmpty()) {
+            return b;
+        } else if (b.isEmpty()) {
+            return a;
+        } else {
+            Map<String, String> union = new HashMap<String, String>();
+            union.putAll(a);
+            union.putAll(b);
+            return union;
+        }
+    }
+
+    public MediaType(MediaType type, Map<String, String> parameters) {
+        this(type.type, type.subtype, union(type.parameters, parameters));
+    }
+
+    public MediaType getBaseType() {
+        if (parameters.isEmpty()) {
+            return this;
+        } else {
+            return new MediaType(type, subtype);
+        }
+    }
+
+    public String getType() {
+        return type;
+    }
+
+    public String getSubtype() {
+        return subtype;
+    }
+
+    public Map<String, String> getParameters() {
+        return Collections.unmodifiableMap(parameters);
+    }
+
+    public boolean isSpecializationOf(MediaType that) {
+        if (OCTET_STREAM.equals(that)) {
+            return true;
+        } else if (!type.equals(that.type)) {
+            return false;
+        } else if (!parameters.entrySet().containsAll(that.parameters.entrySet())) {
+            return false;
+        } else if (TEXT_PLAIN.equals(that.getBaseType())) {
+            return true;
+        } else if (APPLICATION_XML.equals(that.getBaseType())
+                && subtype.endsWith("+xml")) {
+            return true;
+        } else {
+            return subtype.equals(that.subtype);
+        }
+    }
+
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(type);
+        builder.append('/');
+        builder.append(subtype);
+        for (Map.Entry<String, String> entry : parameters.entrySet()) {
+            builder.append("; ");
+            builder.append(entry.getKey());
+            builder.append("=");
+            String value = entry.getValue();
+            if (SPECIAL_OR_WHITESPACE.matcher(value).find()) {
+                builder.append('"');
+                builder.append(SPECIAL.matcher(value).replaceAll("\\\\$0"));
+                builder.append('"');
+            } else {
+                builder.append(value);
+            }
+        }
+        return builder.toString();
+    }
+
+    public boolean equals(Object object) {
+        if (object instanceof MediaType) {
+            MediaType that = (MediaType) object;
+            return type.equals(that.type)
+                && subtype.equals(that.subtype)
+                && parameters.equals(that.parameters);
+        } else {
+            return false;
+        }
+    }
+
+    public int hashCode() {
+        int hash = 17;
+        hash = hash * 31 + type.hashCode();
+        hash = hash * 31 + subtype.hashCode();
+        hash = hash * 31 + parameters.hashCode();
+        return hash;
+    }
+
+}

Added: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java?rev=653548&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java Mon May  5 11:15:10 2008
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Registry of Internet media types.
+ */
+public final class MediaTypeRegistry {
+
+    private final Map<MediaType, MediaType> aliases =
+        new HashMap<MediaType, MediaType>();
+
+    public void addAlias(MediaType canonical, MediaType alias) {
+        aliases.put(alias, canonical);
+    }
+
+    public MediaType unalias(MediaType type) {
+        MediaType canonical = aliases.get(type.getBaseType());
+        if (canonical != null) {
+            return new MediaType(canonical, type.getParameters());
+        } else {
+            return type;
+        }
+    }
+
+}

Added: incubator/tika/trunk/src/test/java/org/apache/tika/mime/MediaTypeTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/mime/MediaTypeTest.java?rev=653548&view=auto
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/mime/MediaTypeTest.java (added)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/mime/MediaTypeTest.java Mon May  5 11:15:10 2008
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+public class MediaTypeTest extends TestCase {
+
+    public void testBasics() {
+        assertEquals(
+                "application/octet-stream",
+                new MediaType("application", "octet-stream").toString());
+
+        assertEquals(
+                "text/plain",
+                new MediaType("text", "plain").toString());
+
+        Map<String, String> parameters = new HashMap<String, String>();
+        assertEquals(
+                "text/plain",
+                new MediaType("text", "plain", parameters).toString());
+
+        parameters.put("charset", "UTF-8");
+        assertEquals(
+                "text/plain; charset=UTF-8",
+                new MediaType("text", "plain", parameters).toString());
+
+        parameters.put("x-eol-style", "crlf");
+        assertEquals(
+                "text/plain; charset=UTF-8; x-eol-style=crlf",
+                new MediaType("text", "plain", parameters).toString());
+    }
+
+    public void testLowerCase() {
+        assertEquals(
+                "text/plain",
+                new MediaType("TEXT", "PLAIN").toString());
+        assertEquals(
+                "text/plain",
+                new MediaType("Text", "Plain").toString());
+
+        Map<String, String> parameters = new HashMap<String, String>();
+        assertEquals(
+                "text/plain",
+                new MediaType("text", "PLAIN", parameters).toString());
+
+        parameters.put("CHARSET", "UTF-8");
+        assertEquals(
+                "text/plain; charset=UTF-8",
+                new MediaType("TEXT", "plain", parameters).toString());
+
+        parameters.put("X-Eol-Style", "crlf");
+        assertEquals(
+                "text/plain; charset=UTF-8; x-eol-style=crlf",
+                new MediaType("TeXt", "PlAiN", parameters).toString());
+    }
+
+    public void testTrim() {
+        assertEquals(
+                "text/plain",
+                new MediaType(" text ", " plain ").toString());
+        assertEquals(
+                "text/plain",
+                new MediaType("\ttext", "plain\t").toString());
+
+        Map<String, String> parameters = new HashMap<String, String>();
+        assertEquals(
+                "text/plain",
+                new MediaType("text\r\n", " \tplain", parameters).toString());
+
+        parameters.put(" charset", "UTF-8");
+        assertEquals(
+                "text/plain; charset=UTF-8",
+                new MediaType("\n\ntext", "plain \r", parameters).toString());
+
+        parameters.put("\r\n\tx-eol-style  \t", "crlf");
+        assertEquals(
+                "text/plain; charset=UTF-8; x-eol-style=crlf",
+            new MediaType("    text", "\tplain ", parameters).toString());
+    }
+
+    public void testQuote() {
+        Map<String, String> parameters = new HashMap<String, String>();
+        parameters.put("a", " value with spaces ");
+        parameters.put("b", "text/plain");
+        parameters.put("c", "()<>@,;:\\\"/[]?=");
+        assertEquals(
+                "text/plain; a=\" value with spaces \"; b=\"text\\/plain\""
+                + "; c=\"\\(\\)\\<\\>\\@\\,\\;\\:\\\\\\\"\\/\\[\\]\\?\\=\"",
+                new MediaType("text", "plain", parameters).toString());
+    }
+
+    
+}