You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/28 15:05:14 UTC

svn commit: r938976 - in /lucene/tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/mime/ tika-core/src/main/java/org/apache/tika/utils/ tika-parsers/src/test/java/org/apache/tika/ tika-parsers/src/test/...

Author: jukka
Date: Wed Apr 28 13:05:14 2010
New Revision: 938976

URL: http://svn.apache.org/viewvc?rev=938976&view=rev
Log:
TIKA-89: Rename MimeType and MimeTypes

Make MimeType use MediaType internally. The plan is to ultimately get rid of the MimeType class.

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Wed Apr 28 13:05:14 2010
@@ -191,7 +191,7 @@ public class TikaConfig {
      * @param mimeType MIME type
      * @return configured Parser instance, or <code>null</code>
      */
-    public Parser getParser(String mimeType) {
+    public Parser getParser(MediaType mimeType) {
         return parsers.get(mimeType);
     }
 

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -28,7 +28,7 @@ import java.util.regex.Pattern;
 /**
  * Internet media type.
  */
-public final class MediaType {
+public final class MediaType implements Comparable<MediaType> {
 
     private static final Map<String, String> NO_PARAMETERS =
         new TreeMap<String, String>();
@@ -254,4 +254,8 @@ public final class MediaType {
         return hash;
     }
 
+    public int compareTo(MediaType that) {
+        return toString().compareTo(that.toString());
+    }
+
 }

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -67,9 +67,9 @@ public final class MimeType implements C
     private final MimeTypes registry;
 
     /**
-     * Lower case name of this media type.
+     * The normalized media type name.
      */
-    private final String name;
+    private final MediaType type;
 
     /**
      * Description of this media type.
@@ -99,17 +99,26 @@ public final class MimeType implements C
      * up to date.
      *
      * @param registry the media type registry that contains this type
-     * @param name media type name
+     * @param type normalized media type name
      */
-    MimeType(MimeTypes registry, String name) {
+    MimeType(MimeTypes registry, MediaType type) {
         if (registry == null) {
             throw new IllegalArgumentException("Registry is missing");
         }
-        if (!MimeType.isValid(name) || !name.equals(name.toLowerCase())) {
-            throw new IllegalArgumentException("Media type name is invalid");
+        if (type == null) {
+            throw new IllegalArgumentException("Media type name is missing");
         }
         this.registry = registry;
-        this.name = name;
+        this.type = type;
+    }
+
+    /**
+     * Returns the normalized media type name.
+     *
+     * @return media type
+     */
+    public MediaType getType() {
+        return type;
     }
 
     /**
@@ -118,7 +127,7 @@ public final class MimeType implements C
      * @return media type name (lower case)
      */
     public String getName() {
-        return name;
+        return type.toString();
     }
 
     /**
@@ -197,14 +206,9 @@ public final class MimeType implements C
      * @throws MimeTypeException if the alias is invalid or
      *                           already registered for another media type
      */
-    public void addAlias(String alias) throws MimeTypeException {
-        if (isValid(alias)) {
-            alias = alias.toLowerCase();
-            if (!name.equals(alias)) {
-                registry.addAlias(this, alias);
-            }
-        } else {
-            throw new MimeTypeException("Invalid media type alias: " + alias);
+    public void addAlias(MediaType alias) throws MimeTypeException {
+        if (!alias.isSpecializationOf(type)) {
+            registry.addAlias(this, alias);
         }
     }
 
@@ -347,22 +351,21 @@ public final class MimeType implements C
 
     //----------------------------------------------------------< Comparable >
 
-    public int compareTo(MimeType type) {
-        if (type == null) {
+    public int compareTo(MimeType mime) {
+        if (mime == null) {
             throw new IllegalArgumentException("MimeType is missing");
-        }
-        if (type == this) {
+        } else if (mime == this) {
             return 0;
-        } else if (this.isDescendantOf(type)) {
+        } else if (this.isDescendantOf(mime)) {
             return 1;
-        } else if (type.isDescendantOf(this)) {
+        } else if (mime.isDescendantOf(this)) {
             return -1;
         } else if (superType != null) {
-            return superType.compareTo(type);
-        } else if (type.superType != null) {
-            return compareTo(type.superType);
+            return superType.compareTo(mime);
+        } else if (mime.superType != null) {
+            return compareTo(mime.superType);
         } else {
-            return name.compareTo(type.name);
+            return type.compareTo(mime.type);
         }
     }
 
@@ -374,7 +377,7 @@ public final class MimeType implements C
      * @return media type name
      */
     public String toString() {
-        return name;
+        return type.toString();
     }
 
 }

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Wed Apr 28 13:05:14 2010
@@ -119,7 +119,8 @@ public final class MimeTypes implements 
     private final MimeType xmlMimeType;
     
     /** All the registered MimeTypes indexed on their name */
-    private final Map<String, MimeType> types = new HashMap<String, MimeType>();
+    private final Map<MediaType, MimeType> types =
+        new HashMap<MediaType, MimeType>();
 
     /** The patterns matcher */
     private Patterns patterns = new Patterns();
@@ -133,9 +134,9 @@ public final class MimeTypes implements 
     private final XmlRootExtractor xmlRootExtractor;
 
     public MimeTypes() {
-        rootMimeType = new MimeType(this, OCTET_STREAM);
-        textMimeType = new MimeType(this, PLAIN_TEXT);
-        xmlMimeType = new MimeType(this, XML);
+        rootMimeType = new MimeType(this, MediaType.OCTET_STREAM);
+        textMimeType = new MimeType(this, MediaType.TEXT_PLAIN);
+        xmlMimeType = new MimeType(this, MediaType.APPLICATION_XML);
         
         try {
             textMimeType.setSuperType(rootMimeType);
@@ -144,9 +145,9 @@ public final class MimeTypes implements 
             throw new IllegalStateException("Error in MimeType logic", e);
         }
 
-        types.put(rootMimeType.getName(), rootMimeType);
-        types.put(textMimeType.getName(), textMimeType);
-        types.put(xmlMimeType.getName(), xmlMimeType);
+        types.put(rootMimeType.getType(), rootMimeType);
+        types.put(textMimeType.getType(), textMimeType);
+        types.put(xmlMimeType.getType(), xmlMimeType);
 
         try {
             xmlRootExtractor = new XmlRootExtractor();
@@ -397,21 +398,21 @@ public final class MimeTypes implements 
      */
     public synchronized MimeType forName(String name)
             throws MimeTypeException {
-        if (MimeType.isValid(name)) {
-            name = name.toLowerCase(Locale.ENGLISH);
-            MimeType type = types.get(name);
-            if (type == null) {
-                type = new MimeType(this, name);
-                if (name.startsWith("text/")) {
-                    type.setSuperType(textMimeType);
-                } else if (name.endsWith("+xml")) {
-                	type.setSuperType(xmlMimeType);
+        MediaType type = MediaType.parse(name);
+        if (type != null) {
+            MimeType mime = types.get(type);
+            if (mime == null) {
+                mime = new MimeType(this, type);
+                if ("text".equals(type.getType())) {
+                    mime.setSuperType(textMimeType);
+                } else if (type.getSubtype().endsWith("+xml")) {
+                    mime.setSuperType(xmlMimeType);
                 } else {
-                    type.setSuperType(rootMimeType);
+                    mime.setSuperType(rootMimeType);
                 }
-                types.put(name, type);
+                types.put(type, mime);
             }
-            return type;
+            return mime;
         } else {
             throw new MimeTypeException("Invalid media type name: " + name);
         }
@@ -425,7 +426,7 @@ public final class MimeTypes implements 
      * @param alias media type alias (normalized to lower case)
      * @throws MimeTypeException if the alias already exists
      */
-    synchronized void addAlias(MimeType type, String alias)
+    synchronized void addAlias(MimeType type, MediaType alias)
             throws MimeTypeException {
         if (!types.containsKey(alias)) {
             types.put(alias, type);

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Wed Apr 28 13:05:14 2010
@@ -156,7 +156,13 @@ final class MimeTypesReader implements M
                     readMagic(nodeElement, type);
                 } else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
                     String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
-                    type.addAlias(alias);
+                    MediaType aliasType = MediaType.parse(alias);
+                    if (aliasType != null) {
+                        type.addAlias(aliasType);
+                    } else {
+                        throw new MimeTypeException(
+                                "Invalid media type alias: " + alias);
+                    }
                 } else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
                     readRootXML(nodeElement, type);
                 } else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java Wed Apr 28 13:05:14 2010
@@ -28,6 +28,7 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMimeKeys;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -52,7 +53,7 @@ public class ParseUtils implements TikaM
      */
     public static Parser getParser(String mimeType, TikaConfig config)
             throws TikaException {
-        return config.getParser(mimeType);
+        return config.getParser(MediaType.parse(mimeType));
     }
 
     /**
@@ -106,7 +107,7 @@ public class ParseUtils implements TikaM
             InputStream stream, TikaConfig config, String mimeType)
             throws TikaException, IOException {
         try {
-            Parser parser = config.getParser(mimeType);
+            Parser parser = config.getParser(MediaType.parse(mimeType));
             ContentHandler handler = new BodyContentHandler();
             parser.parse(stream, handler, new Metadata());
             return handler.toString();

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Wed Apr 28 13:05:14 2010
@@ -26,6 +26,7 @@ import junit.framework.TestCase;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.ParseUtils;
 import org.xml.sax.helpers.DefaultHandler;
@@ -78,7 +79,8 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc,
                 "application/vnd.ms-powerpoint");
         assertEquals(s1, s2);
-        Parser parser = tc.getParser("application/vnd.ms-powerpoint");
+        Parser parser =
+            tc.getParser(MediaType.parse("application/vnd.ms-powerpoint"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -94,7 +96,7 @@ public class TestParsers extends TestCas
         String s1 = ParseUtils.getStringContent(file, tc);
         String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
         assertEquals(s1, s2);
-        Parser parser = tc.getParser("application/msword");
+        Parser parser = tc.getParser(MediaType.parse("application/msword"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -114,7 +116,8 @@ public class TestParsers extends TestCas
         assertEquals(s1, s2);
         assertTrue("Text does not contain '" + expected + "'", s1
                 .contains(expected));
-        Parser parser = tc.getParser("application/vnd.ms-excel");
+        Parser parser =
+            tc.getParser(MediaType.parse("application/vnd.ms-excel"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -147,7 +150,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "text/html");
         assertEquals(s1, s2);
 
-        Parser parser = tc.getParser("text/html");
+        Parser parser = tc.getParser(MediaType.parse("text/html"));
         assertNotNull(parser);
     }
 
@@ -157,7 +160,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "application/zip");
         assertEquals(s1, s2);
 
-        Parser parser = tc.getParser("application/zip");
+        Parser parser = tc.getParser(MediaType.parse("application/zip"));
         assertNotNull(parser);
     }
 
@@ -167,7 +170,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "audio/mpeg");
         assertEquals(s1, s2);
 
-        Parser parser = tc.getParser("audio/mpeg");
+        Parser parser = tc.getParser(MediaType.parse("audio/mpeg"));
         assertNotNull(parser);
     }
 

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Wed Apr 28 13:05:14 2010
@@ -33,7 +33,7 @@ public class MimeTypeTest extends TestCa
 
         // Missing registry
         try {
-            new MimeType(null, "text/plain");
+            new MimeType(null, MediaType.TEXT_PLAIN);
             fail("Expected IllegalArgumentException");
         } catch (IllegalArgumentException e) {
             // expected result
@@ -46,22 +46,6 @@ public class MimeTypeTest extends TestCa
         } catch (IllegalArgumentException e) {
             // expected result
         }
-
-        // Invalid name (no slash)
-        try {
-            new MimeType(types, "application");
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-
-        // Invalid name (not lower case)
-        try {
-            new MimeType(types, "TEXT/PLAIN");
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
     }
 
     public void testIsValidName() {

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Wed Apr 28 13:05:14 2010
@@ -35,7 +35,7 @@ public class MimeTypesTest extends TestC
         types = new MimeTypes();
         binary = types.forName("application/octet-stream");
         text = types.forName("text/plain");
-        text.addAlias("text/x-plain");
+        text.addAlias(MediaType.parse("text/x-plain"));
         html = types.forName("text/html");
         html.setSuperType(text);
     }
@@ -52,16 +52,6 @@ public class MimeTypesTest extends TestC
         }
     }
 
-    public void testAddAlias() throws MimeTypeException {
-        assertEquals(text, types.forName("text/x-plain"));
-        try {
-            text.addAlias("invalid");
-            fail("MimeTypeException not thrown on invalid alias name");
-        } catch (MimeTypeException e) {
-            // expected
-        }
-    }
-
     public void testSuperType() throws MimeTypeException {
         assertNull(binary.getSuperType());
         assertEquals(binary, text.getSuperType());

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Apr 28 13:05:14 2010
@@ -250,16 +250,16 @@ public class TestMimeTypes extends TestC
      * @since TIKA-194
      */
     public void testJavaRegex() throws Exception{
-        MimeType testType = new MimeType(this.repo, "foo/bar");
+        MimeType testType = new MimeType(this.repo, MediaType.parse("foo/bar"));
         this.repo.add(testType);
         assertNotNull(repo.forName("foo/bar"));
         String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
         this.repo.addPattern(testType, pattern, true);
         String testFileName = "rtg_sst_grb_0.5.12345678";
         assertNotNull(this.repo.getMimeType(testFileName));
-        assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");    
+        assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
         
-        MimeType testType2 = new MimeType(this.repo, "foo/bar2");
+        MimeType testType2 = new MimeType(this.repo, MediaType.parse("foo/bar2"));
         this.repo.add(testType2);
         assertNotNull(repo.forName("foo/bar2"));
         this.repo.addPattern(testType2, pattern, false);



Re: svn commit: r938976 - in /lucene/tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/mime/ tika-core/src/main/java/org/apache/tika/utils/ tika-parsers/src/test/java/org/apache/tika/ tika-parsers/src/test

Posted by "Mattmann, Chris A (388J)" <ch...@jpl.nasa.gov>.
+1, I've found it confusing and I like MediaType better!


On 4/28/10 6:05 AM, "jukka@apache.org" <ju...@apache.org> wrote:

Author: jukka
Date: Wed Apr 28 13:05:14 2010
New Revision: 938976

URL: http://svn.apache.org/viewvc?rev=938976&view=rev
Log:
TIKA-89: Rename MimeType and MimeTypes

Make MimeType use MediaType internally. The plan is to ultimately get rid of the MimeType class.

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Wed Apr 28 13:05:14 2010
@@ -191,7 +191,7 @@ public class TikaConfig {
      * @param mimeType MIME type
      * @return configured Parser instance, or <code>null</code>
      */
-    public Parser getParser(String mimeType) {
+    public Parser getParser(MediaType mimeType) {
         return parsers.get(mimeType);
     }


Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -28,7 +28,7 @@ import java.util.regex.Pattern;
 /**
  * Internet media type.
  */
-public final class MediaType {
+public final class MediaType implements Comparable<MediaType> {

     private static final Map<String, String> NO_PARAMETERS =
         new TreeMap<String, String>();
@@ -254,4 +254,8 @@ public final class MediaType {
         return hash;
     }

+    public int compareTo(MediaType that) {
+        return toString().compareTo(that.toString());
+    }
+
 }

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -67,9 +67,9 @@ public final class MimeType implements C
     private final MimeTypes registry;

     /**
-     * Lower case name of this media type.
+     * The normalized media type name.
      */
-    private final String name;
+    private final MediaType type;

     /**
      * Description of this media type.
@@ -99,17 +99,26 @@ public final class MimeType implements C
      * up to date.
      *
      * @param registry the media type registry that contains this type
-     * @param name media type name
+     * @param type normalized media type name
      */
-    MimeType(MimeTypes registry, String name) {
+    MimeType(MimeTypes registry, MediaType type) {
         if (registry == null) {
             throw new IllegalArgumentException("Registry is missing");
         }
-        if (!MimeType.isValid(name) || !name.equals(name.toLowerCase())) {
-            throw new IllegalArgumentException("Media type name is invalid");
+        if (type == null) {
+            throw new IllegalArgumentException("Media type name is missing");
         }
         this.registry = registry;
-        this.name = name;
+        this.type = type;
+    }
+
+    /**
+     * Returns the normalized media type name.
+     *
+     * @return media type
+     */
+    public MediaType getType() {
+        return type;
     }

     /**
@@ -118,7 +127,7 @@ public final class MimeType implements C
      * @return media type name (lower case)
      */
     public String getName() {
-        return name;
+        return type.toString();
     }

     /**
@@ -197,14 +206,9 @@ public final class MimeType implements C
      * @throws MimeTypeException if the alias is invalid or
      *                           already registered for another media type
      */
-    public void addAlias(String alias) throws MimeTypeException {
-        if (isValid(alias)) {
-            alias = alias.toLowerCase();
-            if (!name.equals(alias)) {
-                registry.addAlias(this, alias);
-            }
-        } else {
-            throw new MimeTypeException("Invalid media type alias: " + alias);
+    public void addAlias(MediaType alias) throws MimeTypeException {
+        if (!alias.isSpecializationOf(type)) {
+            registry.addAlias(this, alias);
         }
     }

@@ -347,22 +351,21 @@ public final class MimeType implements C

     //----------------------------------------------------------< Comparable >

-    public int compareTo(MimeType type) {
-        if (type == null) {
+    public int compareTo(MimeType mime) {
+        if (mime == null) {
             throw new IllegalArgumentException("MimeType is missing");
-        }
-        if (type == this) {
+        } else if (mime == this) {
             return 0;
-        } else if (this.isDescendantOf(type)) {
+        } else if (this.isDescendantOf(mime)) {
             return 1;
-        } else if (type.isDescendantOf(this)) {
+        } else if (mime.isDescendantOf(this)) {
             return -1;
         } else if (superType != null) {
-            return superType.compareTo(type);
-        } else if (type.superType != null) {
-            return compareTo(type.superType);
+            return superType.compareTo(mime);
+        } else if (mime.superType != null) {
+            return compareTo(mime.superType);
         } else {
-            return name.compareTo(type.name);
+            return type.compareTo(mime.type);
         }
     }

@@ -374,7 +377,7 @@ public final class MimeType implements C
      * @return media type name
      */
     public String toString() {
-        return name;
+        return type.toString();
     }

 }

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Wed Apr 28 13:05:14 2010
@@ -119,7 +119,8 @@ public final class MimeTypes implements
     private final MimeType xmlMimeType;

     /** All the registered MimeTypes indexed on their name */
-    private final Map<String, MimeType> types = new HashMap<String, MimeType>();
+    private final Map<MediaType, MimeType> types =
+        new HashMap<MediaType, MimeType>();

     /** The patterns matcher */
     private Patterns patterns = new Patterns();
@@ -133,9 +134,9 @@ public final class MimeTypes implements
     private final XmlRootExtractor xmlRootExtractor;

     public MimeTypes() {
-        rootMimeType = new MimeType(this, OCTET_STREAM);
-        textMimeType = new MimeType(this, PLAIN_TEXT);
-        xmlMimeType = new MimeType(this, XML);
+        rootMimeType = new MimeType(this, MediaType.OCTET_STREAM);
+        textMimeType = new MimeType(this, MediaType.TEXT_PLAIN);
+        xmlMimeType = new MimeType(this, MediaType.APPLICATION_XML);

         try {
             textMimeType.setSuperType(rootMimeType);
@@ -144,9 +145,9 @@ public final class MimeTypes implements
             throw new IllegalStateException("Error in MimeType logic", e);
         }

-        types.put(rootMimeType.getName(), rootMimeType);
-        types.put(textMimeType.getName(), textMimeType);
-        types.put(xmlMimeType.getName(), xmlMimeType);
+        types.put(rootMimeType.getType(), rootMimeType);
+        types.put(textMimeType.getType(), textMimeType);
+        types.put(xmlMimeType.getType(), xmlMimeType);

         try {
             xmlRootExtractor = new XmlRootExtractor();
@@ -397,21 +398,21 @@ public final class MimeTypes implements
      */
     public synchronized MimeType forName(String name)
             throws MimeTypeException {
-        if (MimeType.isValid(name)) {
-            name = name.toLowerCase(Locale.ENGLISH);
-            MimeType type = types.get(name);
-            if (type == null) {
-                type = new MimeType(this, name);
-                if (name.startsWith("text/")) {
-                    type.setSuperType(textMimeType);
-                } else if (name.endsWith("+xml")) {
-                       type.setSuperType(xmlMimeType);
+        MediaType type = MediaType.parse(name);
+        if (type != null) {
+            MimeType mime = types.get(type);
+            if (mime == null) {
+                mime = new MimeType(this, type);
+                if ("text".equals(type.getType())) {
+                    mime.setSuperType(textMimeType);
+                } else if (type.getSubtype().endsWith("+xml")) {
+                    mime.setSuperType(xmlMimeType);
                 } else {
-                    type.setSuperType(rootMimeType);
+                    mime.setSuperType(rootMimeType);
                 }
-                types.put(name, type);
+                types.put(type, mime);
             }
-            return type;
+            return mime;
         } else {
             throw new MimeTypeException("Invalid media type name: " + name);
         }
@@ -425,7 +426,7 @@ public final class MimeTypes implements
      * @param alias media type alias (normalized to lower case)
      * @throws MimeTypeException if the alias already exists
      */
-    synchronized void addAlias(MimeType type, String alias)
+    synchronized void addAlias(MimeType type, MediaType alias)
             throws MimeTypeException {
         if (!types.containsKey(alias)) {
             types.put(alias, type);

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Wed Apr 28 13:05:14 2010
@@ -156,7 +156,13 @@ final class MimeTypesReader implements M
                     readMagic(nodeElement, type);
                 } else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
                     String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
-                    type.addAlias(alias);
+                    MediaType aliasType = MediaType.parse(alias);
+                    if (aliasType != null) {
+                        type.addAlias(aliasType);
+                    } else {
+                        throw new MimeTypeException(
+                                "Invalid media type alias: " + alias);
+                    }
                 } else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
                     readRootXML(nodeElement, type);
                 } else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java Wed Apr 28 13:05:14 2010
@@ -28,6 +28,7 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMimeKeys;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -52,7 +53,7 @@ public class ParseUtils implements TikaM
      */
     public static Parser getParser(String mimeType, TikaConfig config)
             throws TikaException {
-        return config.getParser(mimeType);
+        return config.getParser(MediaType.parse(mimeType));
     }

     /**
@@ -106,7 +107,7 @@ public class ParseUtils implements TikaM
             InputStream stream, TikaConfig config, String mimeType)
             throws TikaException, IOException {
         try {
-            Parser parser = config.getParser(mimeType);
+            Parser parser = config.getParser(MediaType.parse(mimeType));
             ContentHandler handler = new BodyContentHandler();
             parser.parse(stream, handler, new Metadata());
             return handler.toString();

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Wed Apr 28 13:05:14 2010
@@ -26,6 +26,7 @@ import junit.framework.TestCase;

 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.ParseUtils;
 import org.xml.sax.helpers.DefaultHandler;
@@ -78,7 +79,8 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc,
                 "application/vnd.ms-powerpoint");
         assertEquals(s1, s2);
-        Parser parser = tc.getParser("application/vnd.ms-powerpoint");
+        Parser parser =
+            tc.getParser(MediaType.parse("application/vnd.ms-powerpoint"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -94,7 +96,7 @@ public class TestParsers extends TestCas
         String s1 = ParseUtils.getStringContent(file, tc);
         String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
         assertEquals(s1, s2);
-        Parser parser = tc.getParser("application/msword");
+        Parser parser = tc.getParser(MediaType.parse("application/msword"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -114,7 +116,8 @@ public class TestParsers extends TestCas
         assertEquals(s1, s2);
         assertTrue("Text does not contain '" + expected + "'", s1
                 .contains(expected));
-        Parser parser = tc.getParser("application/vnd.ms-excel");
+        Parser parser =
+            tc.getParser(MediaType.parse("application/vnd.ms-excel"));
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -147,7 +150,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "text/html");
         assertEquals(s1, s2);

-        Parser parser = tc.getParser("text/html");
+        Parser parser = tc.getParser(MediaType.parse("text/html"));
         assertNotNull(parser);
     }

@@ -157,7 +160,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "application/zip");
         assertEquals(s1, s2);

-        Parser parser = tc.getParser("application/zip");
+        Parser parser = tc.getParser(MediaType.parse("application/zip"));
         assertNotNull(parser);
     }

@@ -167,7 +170,7 @@ public class TestParsers extends TestCas
         String s2 = ParseUtils.getStringContent(file, tc, "audio/mpeg");
         assertEquals(s1, s2);

-        Parser parser = tc.getParser("audio/mpeg");
+        Parser parser = tc.getParser(MediaType.parse("audio/mpeg"));
         assertNotNull(parser);
     }


Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Wed Apr 28 13:05:14 2010
@@ -33,7 +33,7 @@ public class MimeTypeTest extends TestCa

         // Missing registry
         try {
-            new MimeType(null, "text/plain");
+            new MimeType(null, MediaType.TEXT_PLAIN);
             fail("Expected IllegalArgumentException");
         } catch (IllegalArgumentException e) {
             // expected result
@@ -46,22 +46,6 @@ public class MimeTypeTest extends TestCa
         } catch (IllegalArgumentException e) {
             // expected result
         }
-
-        // Invalid name (no slash)
-        try {
-            new MimeType(types, "application");
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-
-        // Invalid name (not lower case)
-        try {
-            new MimeType(types, "TEXT/PLAIN");
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
     }

     public void testIsValidName() {

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Wed Apr 28 13:05:14 2010
@@ -35,7 +35,7 @@ public class MimeTypesTest extends TestC
         types = new MimeTypes();
         binary = types.forName("application/octet-stream");
         text = types.forName("text/plain");
-        text.addAlias("text/x-plain");
+        text.addAlias(MediaType.parse("text/x-plain"));
         html = types.forName("text/html");
         html.setSuperType(text);
     }
@@ -52,16 +52,6 @@ public class MimeTypesTest extends TestC
         }
     }

-    public void testAddAlias() throws MimeTypeException {
-        assertEquals(text, types.forName("text/x-plain"));
-        try {
-            text.addAlias("invalid");
-            fail("MimeTypeException not thrown on invalid alias name");
-        } catch (MimeTypeException e) {
-            // expected
-        }
-    }
-
     public void testSuperType() throws MimeTypeException {
         assertNull(binary.getSuperType());
         assertEquals(binary, text.getSuperType());

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Apr 28 13:05:14 2010
@@ -250,16 +250,16 @@ public class TestMimeTypes extends TestC
      * @since TIKA-194
      */
     public void testJavaRegex() throws Exception{
-        MimeType testType = new MimeType(this.repo, "foo/bar");
+        MimeType testType = new MimeType(this.repo, MediaType.parse("foo/bar"));
         this.repo.add(testType);
         assertNotNull(repo.forName("foo/bar"));
         String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
         this.repo.addPattern(testType, pattern, true);
         String testFileName = "rtg_sst_grb_0.5.12345678";
         assertNotNull(this.repo.getMimeType(testFileName));
-        assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
+        assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");

-        MimeType testType2 = new MimeType(this.repo, "foo/bar2");
+        MimeType testType2 = new MimeType(this.repo, MediaType.parse("foo/bar2"));
         this.repo.add(testType2);
         assertNotNull(repo.forName("foo/bar2"));
         this.repo.addPattern(testType2, pattern, false);





++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Chris Mattmann, Ph.D.
Senior Computer Scientist
NASA Jet Propulsion Laboratory Pasadena, CA 91109 USA
Office: 171-266B, Mailstop: 171-246
Email: Chris.Mattmann@jpl.nasa.gov
WWW:   http://sunset.usc.edu/~mattmann/
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adjunct Assistant Professor, Computer Science Department
University of Southern California, Los Angeles, CA 90089 USA
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++