You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/06/18 14:03:09 UTC

svn commit: r955959 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/detect/ tika-core/src/main/java/org/apache/tika/mime/ tika-core/src/main/java/org/apache/tika/parser/ tika-core/src/test/java/...

Author: jukka
Date: Fri Jun 18 12:03:08 2010
New Revision: 955959

URL: http://svn.apache.org/viewvc?rev=955959&view=rev
Log:
TIKA-308: Improve supertype handling in type registry

Move all supertype information into MediaTypeRegistry and leverage it in CompositeParser and elsewhere

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/Patterns.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Fri Jun 18 12:03:08 2010
@@ -31,6 +31,7 @@ import javax.xml.parsers.ParserConfigura
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
@@ -251,6 +252,10 @@ public class TikaConfig {
         return mimeTypes;
     }
 
+    public MediaTypeRegistry getMediaTypeRegistry() {
+        return mimeTypes.getMediaTypeRegistry();
+    }
+
     /**
      * Provides a default configuration (TikaConfig).  Currently creates a
      * new instance each time it's called; we may be able to have it

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java Fri Jun 18 12:03:08 2010
@@ -23,20 +23,34 @@ import java.util.List;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 
 /**
  * Content type detector that combines multiple different detection mechanisms.
  */
 public class CompositeDetector implements Detector {
 
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = 5980683158436430252L;
+
+    private final MediaTypeRegistry registry;
+
     private final List<Detector> detectors;
 
-    public CompositeDetector(List<Detector> detectors) {
+    public CompositeDetector(
+            MediaTypeRegistry registry, List<Detector> detectors) {
+        this.registry = registry;
         this.detectors = detectors;
     }
 
+    public CompositeDetector(List<Detector> detectors) {
+        this(new MediaTypeRegistry(), detectors);
+    }
+
     public CompositeDetector(Detector... detectors) {
-        this.detectors = Arrays.asList(detectors);
+        this(Arrays.asList(detectors));
     }
 
     public MediaType detect(InputStream input, Metadata metadata)
@@ -44,7 +58,7 @@ public class CompositeDetector implement
         MediaType type = MediaType.OCTET_STREAM;
         for (Detector detector : detectors) {
             MediaType detected = detector.detect(input, metadata);
-            if (detected.isSpecializationOf(type)) {
+            if (registry.isSpecializationOf(detected, type)) {
                 type = detected;
             }
         }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Fri Jun 18 12:03:08 2010
@@ -205,23 +205,6 @@ public final class MediaType implements 
         return Collections.unmodifiableMap(parameters);
     }
 
-    public boolean isSpecializationOf(MediaType that) {
-        if (OCTET_STREAM.equals(that)) {
-            return true;
-        } else if (!type.equals(that.type)) {
-            return false;
-        } else if (!parameters.entrySet().containsAll(that.parameters.entrySet())) {
-            return false;
-        } else if (TEXT_PLAIN.equals(that.getBaseType())) {
-            return true;
-        } else if (APPLICATION_XML.equals(that.getBaseType())
-                && subtype.endsWith("+xml")) {
-            return true;
-        } else {
-            return subtype.equals(that.subtype);
-        }
-    }
-
     public String toString() {
         StringBuilder builder = new StringBuilder();
         builder.append(type);

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java Fri Jun 18 12:03:08 2010
@@ -18,6 +18,8 @@ package org.apache.tika.mime;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
 
 /**
  * Registry of known Internet media types.
@@ -25,8 +27,7 @@ import java.util.Map;
 public class MediaTypeRegistry {
 
     /**
-     * Registry of known media types, including type aliases. All the types
-     * in this map are base types, i.e. they have no parameters. A canonical
+     * Registry of known media types, including type aliases. A canonical
      * media type is handled as an identity mapping, while an alias is stored
      * as a mapping from the alias to the corresponding canonical type.
      */
@@ -35,31 +36,49 @@ public class MediaTypeRegistry {
 
     /**
      * Known type inheritance relationships. The mapping is from a media type
-     * to the closest supertype. All types in this map are canonical and have
-     * no parameters.
+     * to the closest supertype.
      */
     private final Map<MediaType, MediaType> inheritance =
         new HashMap<MediaType, MediaType>();
 
-    public void addType(MediaType type) {
-        if (type == null || type.hasParameters()) {
-            throw new IllegalArgumentException();
-        } else if (registry.containsKey(type)) {
-            throw new IllegalStateException();
-        } else {
-            registry.put(type, type);
+    /**
+     * Returns the set of all known canonical media types. Type aliases are
+     * not included in the returned set.
+     *
+     * @since Apache Tika 0.8
+     * @return canonical media types
+     */
+    public SortedSet<MediaType> getTypes() {
+        return new TreeSet<MediaType>(registry.values());
+    }
+
+    /**
+     * Returns the set of known aliases of the given canonical media type.
+     *
+     * @since Apache Tika 0.8
+     * @param type canonical media type
+     * @return known aliases
+     */
+    public SortedSet<MediaType> getAliases(MediaType type) {
+        SortedSet<MediaType> aliases = new TreeSet<MediaType>();
+        for (Map.Entry<MediaType, MediaType> entry : registry.entrySet()) {
+            if (entry.getValue().equals(type) && !entry.getKey().equals(type)) {
+                aliases.add(entry.getKey());
+            }
         }
+        return aliases;
+    }
+
+    public void addType(MediaType type) {
+        registry.put(type, type);
     }
 
     public void addAlias(MediaType type, MediaType alias) {
-        if (type == null || alias == null
-                || type.hasParameters() || alias.hasParameters()) {
-            throw new IllegalArgumentException();
-        } else if (!registry.containsKey(type) || registry.containsKey(alias)) {
-            throw new IllegalStateException();
-        } else {
-            registry.put(alias, type);
-        }
+        registry.put(alias, type);
+    }
+
+    public void addSuperType(MediaType type, MediaType supertype) {
+        inheritance.put(type, supertype);
     }
 
     public MediaType normalize(MediaType type) {
@@ -74,6 +93,20 @@ public class MediaTypeRegistry {
     }
 
     /**
+     * Checks whether the given media type a is a specialization of a more
+     * generic type b.
+     *
+     * @param a media type
+     * @param b suspected supertype
+     * @return <code>true</code> if b is a supertype of a,
+     *         <code>false</code> otherwise
+     */
+    public boolean isSpecializationOf(MediaType a, MediaType b) {
+        MediaType x = getSuperType(a);
+        return x != null && (x.equals(b) || isSpecializationOf(x, b));
+    }
+
+    /**
      * Returns the supertype of the given type. If the given type has any
      * parameters, then the respective base type is returned. Otherwise
      * built-in heuristics like text/... -&gt; text/plain and

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Fri Jun 18 12:03:08 2010
@@ -68,11 +68,6 @@ public final class MimeType implements C
     }
 
     /**
-     * The media type registry that contains this type.
-     */
-    private final MimeTypes registry;
-
-    /**
      * The normalized media type name.
      */
     private final MediaType type;
@@ -82,12 +77,6 @@ public final class MimeType implements C
      */
     private String description = "";
 
-    /**
-     * The parent type of this media type, or <code>null</code> if this
-     * is a top-level type.
-     */
-    private MimeType superType = null;
-
     /** The magics associated to this Mime-Type */
     private final ArrayList<Magic> magics = new ArrayList<Magic>();
 
@@ -104,17 +93,12 @@ public final class MimeType implements C
      * {@link MimeTypes#forName(String)} to keep the media type registry
      * up to date.
      *
-     * @param registry the media type registry that contains this type
      * @param type normalized media type name
      */
-    MimeType(MimeTypes registry, MediaType type) {
-        if (registry == null) {
-            throw new IllegalArgumentException("Registry is missing");
-        }
+    MimeType(MediaType type) {
         if (type == null) {
             throw new IllegalArgumentException("Media type name is missing");
         }
-        this.registry = registry;
         this.type = type;
     }
 
@@ -137,54 +121,6 @@ public final class MimeType implements C
     }
 
     /**
-     * Returns the parent of this media type.
-     *
-     * @return parent media type, or <code>null</code>
-     */
-    public MimeType getSuperType() {
-        return superType;
-    }
-
-    public void setSuperType(MimeType type) throws MimeTypeException {
-        if (type == null) {
-            throw new IllegalArgumentException("MimeType is missing");
-        }
-        if (type.registry != registry) {
-            throw new IllegalArgumentException("MimeType is from a different registry");
-        }
-        if (this.isDescendantOf(type)) {
-            // ignore, already a descendant of the given type
-        } else if (this == type) {
-            throw new MimeTypeException(
-                    "Media type can not inherit itself: " + type);
-        } else if (type.isDescendantOf(this)) {
-            throw new MimeTypeException(
-                    "Media type can not inherit its descendant: " + type);
-        } else if (superType == null) {
-            superType = type;
-        } else if (type.isDescendantOf(superType)) {
-            superType = type;
-        } else {
-            throw new MimeTypeException(
-                    "Conflicting media type inheritance: " + type);
-        }
-    }
-
-    public boolean isDescendantOf(MimeType type) {
-        if (type == null) {
-            throw new IllegalArgumentException("MimeType is missing");
-        }
-        synchronized (registry) {
-            for (MimeType t = superType; t != null; t = t.superType) {
-                if (t == type) {
-                    return true;
-                }
-            }
-            return false;
-        }
-    }
-
-    /**
      * Returns the description of this media type.
      *
      * @return media type description
@@ -206,19 +142,6 @@ public final class MimeType implements C
     }
 
     /**
-     * Adds an alias name for this media type.
-     *
-     * @param alias media type alias (case insensitive)
-     * @throws MimeTypeException if the alias is invalid or
-     *                           already registered for another media type
-     */
-    public void addAlias(MediaType alias) throws MimeTypeException {
-        if (!alias.isSpecializationOf(type)) {
-            registry.addAlias(this, alias);
-        }
-    }
-
-    /**
      * Add some rootXML info to this mime-type
      *
      * @param namespaceURI
@@ -363,21 +286,7 @@ public final class MimeType implements C
     //----------------------------------------------------------< Comparable >
 
     public int compareTo(MimeType mime) {
-        if (mime == null) {
-            throw new IllegalArgumentException("MimeType is missing");
-        } else if (mime == this) {
-            return 0;
-        } else if (this.isDescendantOf(mime)) {
-            return 1;
-        } else if (mime.isDescendantOf(this)) {
-            return -1;
-        } else if (superType != null) {
-            return superType.compareTo(mime);
-        } else if (mime.superType != null) {
-            return compareTo(mime.superType);
-        } else {
-            return type.compareTo(mime.type);
-        }
+        return type.compareTo(mime.type);
     }
 
     //--------------------------------------------------------------< Object >

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Fri Jun 18 12:03:08 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -125,13 +125,18 @@ public final class MimeTypes implements 
      * xml type, application/xml
      */
     private final MimeType xmlMimeType;
-    
-    /** All the registered MimeTypes indexed on their name */
+
+    /**
+     * Registered media types and their aliases.
+     */
+    private final MediaTypeRegistry registry = new MediaTypeRegistry();
+
+    /** All the registered MimeTypes indexed on their canonical names */
     private final Map<MediaType, MimeType> types =
         new HashMap<MediaType, MimeType>();
 
     /** The patterns matcher */
-    private Patterns patterns = new Patterns();
+    private Patterns patterns = new Patterns(registry);
 
     /** List of all registered magics */
     private SortedSet<Magic> magics = new TreeSet<Magic>();
@@ -142,20 +147,13 @@ public final class MimeTypes implements 
     private transient XmlRootExtractor xmlRootExtractor = null;
 
     public MimeTypes() {
-        rootMimeType = new MimeType(this, MediaType.OCTET_STREAM);
-        textMimeType = new MimeType(this, MediaType.TEXT_PLAIN);
-        xmlMimeType = new MimeType(this, MediaType.APPLICATION_XML);
+        rootMimeType = new MimeType(MediaType.OCTET_STREAM);
+        textMimeType = new MimeType(MediaType.TEXT_PLAIN);
+        xmlMimeType = new MimeType(MediaType.APPLICATION_XML);
         
-        try {
-            textMimeType.setSuperType(rootMimeType);
-            xmlMimeType.setSuperType(rootMimeType);
-        } catch (MimeTypeException e) {
-            throw new IllegalStateException("Error in MimeType logic", e);
-        }
-
-        types.put(rootMimeType.getType(), rootMimeType);
-        types.put(textMimeType.getType(), textMimeType);
-        types.put(xmlMimeType.getType(), xmlMimeType);
+        add(rootMimeType);
+        add(textMimeType);
+        add(xmlMimeType);
     }
 
     /**
@@ -411,16 +409,10 @@ public final class MimeTypes implements 
             throws MimeTypeException {
         MediaType type = MediaType.parse(name);
         if (type != null) {
-            MimeType mime = types.get(type);
+            MimeType mime = types.get(registry.normalize(type));
             if (mime == null) {
-                mime = new MimeType(this, type);
-                if ("text".equals(type.getType())) {
-                    mime.setSuperType(textMimeType);
-                } else if (type.getSubtype().endsWith("+xml")) {
-                    mime.setSuperType(xmlMimeType);
-                } else {
-                    mime.setSuperType(rootMimeType);
-                }
+                mime = new MimeType(type);
+                add(mime);
                 types.put(type, mime);
             }
             return mime;
@@ -429,22 +421,19 @@ public final class MimeTypes implements 
         }
     }
 
+    public synchronized void setSuperType(MimeType type, MediaType parent) {
+        registry.addSuperType(type.getType(), parent);
+    }
+
     /**
      * Adds an alias for the given media type. This method should only
      * be called from {@link MimeType#addAlias(String)}.
      *
      * @param type media type
      * @param alias media type alias (normalized to lower case)
-     * @throws MimeTypeException if the alias already exists
      */
-    synchronized void addAlias(MimeType type, MediaType alias)
-            throws MimeTypeException {
-        if (!types.containsKey(alias)) {
-            types.put(alias, type);
-        } else {
-            throw new MimeTypeException(
-                    "Media type alias already exists: " + alias);
-        }
+    synchronized void addAlias(MimeType type, MediaType alias) {
+        registry.addAlias(type.getType(), alias);
     }
 
     /**
@@ -486,6 +475,10 @@ public final class MimeTypes implements 
         patterns.add(pattern, isRegex, type);
     }
 
+    public MediaTypeRegistry getMediaTypeRegistry() {
+        return registry;
+    }
+
     /**
      * Return the minimum length of data to provide to analyzing methods based
      * on the document's content in order to check all the known MimeTypes.
@@ -507,6 +500,9 @@ public final class MimeTypes implements 
      *            is the mime-type to add.
      */
     void add(MimeType type) {
+        registry.addType(type.getType());
+        types.put(type.getType(), type);
+
         // Update the magics index...
         if (type.hasMagic()) {
             magics.addAll(Arrays.asList(type.getMagics()));
@@ -533,14 +529,14 @@ public final class MimeTypes implements 
      */
     public MediaType detect(InputStream input, Metadata metadata)
             throws IOException {
-        MimeType type = rootMimeType;
+        MediaType type = MediaType.OCTET_STREAM;
 
         // Get type based on magic prefix
         if (input != null) {
             input.mark(getMinLength());
             try {
                 byte[] prefix = readMagicHeader(input);
-                type = getMimeType(prefix);
+                type = getMimeType(prefix).getType();
             } finally {
                 input.reset();
             }
@@ -566,8 +562,8 @@ public final class MimeTypes implements 
             }
 
             if (name != null) {
-                MimeType hint = getMimeType(name);
-                if (hint.isDescendantOf(type)) {
+                MediaType hint = getMimeType(name).getType();
+                if (registry.isSpecializationOf(hint, type)) {
                     type = hint;
                 }
             }
@@ -577,8 +573,8 @@ public final class MimeTypes implements 
         String typeName = metadata.get(Metadata.CONTENT_TYPE);
         if (typeName != null) {
             try {
-                MimeType hint = forName(typeName);
-                if (hint.isDescendantOf(type)) {
+                MediaType hint = forName(typeName).getType();
+                if (registry.isSpecializationOf(hint, type)) {
                     type = hint;
                 }
             } catch (MimeTypeException e) {
@@ -586,7 +582,7 @@ public final class MimeTypes implements 
             }
         }
 
-        return MediaType.parse(type.getName());
+        return type;
     }
 
 }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Fri Jun 18 12:03:08 2010
@@ -154,7 +154,7 @@ final class MimeTypesReader implements M
                     String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
                     MediaType aliasType = MediaType.parse(alias);
                     if (aliasType != null) {
-                        type.addAlias(aliasType);
+                        types.addAlias(type, aliasType);
                     } else {
                         throw new MimeTypeException(
                                 "Invalid media type alias: " + alias);
@@ -163,7 +163,7 @@ final class MimeTypesReader implements M
                     readRootXML(nodeElement, type);
                 } else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {
                     String parent = nodeElement.getAttribute(SUB_CLASS_TYPE_ATTR);
-                    type.setSuperType(types.forName(parent));
+                    types.setSuperType(type, MediaType.parse(parent));
                 }
             }
         }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/Patterns.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/Patterns.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/Patterns.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/Patterns.java Fri Jun 18 12:03:08 2010
@@ -33,6 +33,8 @@ class Patterns implements Serializable {
      */
     private static final long serialVersionUID = -5778015347278111140L;
 
+    private final MediaTypeRegistry registry;
+
     /**
      * Index of exact name patterns.
      */
@@ -72,6 +74,10 @@ class Patterns implements Serializable {
 
     }
 
+    public Patterns(MediaTypeRegistry registry) {
+        this.registry = registry;
+    }
+
     public void add(String pattern, MimeType type) throws MimeTypeException {
         this.add(pattern, false, type);
     }
@@ -103,9 +109,11 @@ class Patterns implements Serializable {
     
     private void addName(String name, MimeType type) throws MimeTypeException {
         MimeType previous = names.get(name);
-        if (previous == null || previous.isDescendantOf(type)) {
+        if (previous == null
+                || registry.isSpecializationOf(previous.getType(), type.getType())) {
             names.put(name, type);
-        } else if (previous == type || type.isDescendantOf(previous)) {
+        } else if (previous == type
+                || registry.isSpecializationOf(type.getType(), previous.getType())) {
             // do nothing
         } else {
             throw new MimeTypeException("Conflicting name pattern: " + name);
@@ -115,12 +123,14 @@ class Patterns implements Serializable {
     private void addExtension(String extension, MimeType type)
             throws MimeTypeException {
         MimeType previous = extensions.get(extension);
-        if (previous == null || previous.isDescendantOf(type)) {
+        if (previous == null
+                || registry.isSpecializationOf(previous.getType(), type.getType())) {
             extensions.put(extension, type);
             int length = extension.length();
             minExtensionLength = Math.min(minExtensionLength, length);
             maxExtensionLength = Math.max(maxExtensionLength, length);
-        } else if (previous == type || type.isDescendantOf(previous)) {
+        } else if (previous == type
+                || registry.isSpecializationOf(type.getType(), previous.getType())) {
             // do nothing
         } else {
             throw new MimeTypeException(
@@ -131,9 +141,11 @@ class Patterns implements Serializable {
     private void addGlob(String glob, MimeType type)
             throws MimeTypeException {
         MimeType previous = globs.get(glob);
-        if (previous == null || previous.isDescendantOf(type)) {
+        if (previous == null
+                || registry.isSpecializationOf(previous.getType(), type.getType())) {
             globs.put(glob, type);
-        } else if (previous == type || type.isDescendantOf(previous)) {
+        } else if (previous == type
+                || registry.isSpecializationOf(type.getType(), previous.getType())) {
             // do nothing
         } else {
             throw new MimeTypeException("Conflicting glob pattern: " + glob);

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Fri Jun 18 12:03:08 2010
@@ -33,6 +33,11 @@ import org.xml.sax.SAXException;
 public class AutoDetectParser extends CompositeParser {
 
     /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = 6110455808615143122L;
+
+    /**
      * The type detector used by this parser to auto-detect the type
      * of a document.
      */
@@ -47,7 +52,7 @@ public class AutoDetectParser extends Co
     }
 
     public AutoDetectParser(Detector detector) {
-        setParsers(TikaConfig.getDefaultConfig().getParsers());
+        this(TikaConfig.getDefaultConfig());
         setDetector(detector);
     }
 
@@ -58,6 +63,7 @@ public class AutoDetectParser extends Co
     public void setConfig(TikaConfig config) {
         setParsers(config.getParsers());
         setDetector(config.getMimeRepository());
+        setMediaTypeRegistry(config.getMediaTypeRegistry());
     }
 
     /**

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Fri Jun 18 12:03:08 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -26,6 +26,7 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.io.TaggedInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.sax.TaggedContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -39,6 +40,16 @@ import org.xml.sax.SAXException;
 public class CompositeParser implements Parser {
 
     /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = 5613173903360405824L;
+
+    /**
+     * Media type registry.
+     */
+    private MediaTypeRegistry registry = new MediaTypeRegistry();
+
+    /**
      * Set of component parsers, keyed by the supported media types.
      */
     private Map<MediaType, Parser> parsers = new HashMap<MediaType, Parser>();
@@ -49,6 +60,26 @@ public class CompositeParser implements 
     private Parser fallback = new EmptyParser();
 
     /**
+     * Returns the media type registry used to infer type relationships.
+     *
+     * @since Apache Tika 0.8
+     * @return media type registry
+     */
+    public MediaTypeRegistry getMediaTypeRegistry() {
+        return registry;
+    }
+
+    /**
+     * Sets the media type registry used to infer type relationships.
+     *
+     * @since Apache Tika 0.8
+     * @param registry media type registry
+     */
+    public void setMediaTypeRegistry(MediaTypeRegistry registry) {
+        this.registry = registry;
+    }
+
+    /**
      * Returns the component parsers.
      *
      * @return component parsers, keyed by media type
@@ -110,7 +141,7 @@ public class CompositeParser implements 
             } else {
                 for (MediaType parserType : parsers.keySet()) {
                     if (parserType != null
-                            && type.isSpecializationOf(parserType)) {
+                            && registry.isSpecializationOf(type, parserType)) {
                         return parsers.get(parserType);
                     }
                 }

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java Fri Jun 18 12:03:08 2010
@@ -29,12 +29,14 @@ public class MimeDetectionTest extends T
 
     private MimeTypes mimeTypes;
 
+    private MediaTypeRegistry registry;
+
     /** @inheritDoc */
     @Override
     protected void setUp() throws Exception {
         super.setUp();
         this.mimeTypes = TikaConfig.getDefaultConfig().getMimeRepository();
-        //this.mimeTypes = MimeTypesFactory.create("/org/apache/tika/mime/tika-mimetypes-minimal.xml");
+        this.registry = mimeTypes.getMediaTypeRegistry();
     }
 
     public void testDetection() throws Exception {
@@ -74,13 +76,38 @@ public class MimeDetectionTest extends T
                 new Metadata()));
     }
 
-    public void testAutosetSupertype() throws MimeTypeException {
-    	MimeTypes types = new MimeTypes();
-    	MimeType type = types.forName("application/something+xml");
-    	assertEquals("application/xml", type.getSuperType().getName());
-    	
-    	type = types.forName("text/something");
-    	assertEquals("text/plain", type.getSuperType().getName());
+    public void testSuperTypes() {
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("text/something; charset=UTF-8"),
+                MediaType.parse("text/something")));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("text/something; charset=UTF-8"),
+                MediaType.TEXT_PLAIN));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("text/something; charset=UTF-8"),
+                MediaType.OCTET_STREAM));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("text/something"),
+                MediaType.TEXT_PLAIN));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("application/something+xml"),
+                MediaType.APPLICATION_XML));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("application/something+zip"),
+                MediaType.APPLICATION_ZIP));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.APPLICATION_XML,
+                MediaType.TEXT_PLAIN));
+
+        assertTrue(registry.isSpecializationOf(
+                MediaType.parse("application/vnd.apple.keynote"),
+                MediaType.APPLICATION_ZIP));
     }
 
     private void testUrl(String expected, String url, String file) throws IOException{
@@ -92,7 +119,7 @@ public class MimeDetectionTest extends T
         InputStream in = getClass().getResourceAsStream(filename);
         testStream(expected, filename, in);
     }
-    
+
     private void testStream(String expected, String urlOrFileName, InputStream in) throws IOException{
         assertNotNull("Test stream: ["+urlOrFileName+"] is null!", in);
         if (!in.markSupported()) {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Fri Jun 18 12:03:08 2010
@@ -30,18 +30,9 @@ public class MimeTypeTest extends TestCa
 
     /** Test MimeType constructor */
     public void testConstrctor() {
-
-        // Missing registry
-        try {
-            new MimeType(null, MediaType.TEXT_PLAIN);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-
         // Missing name
         try {
-            new MimeType(types, null);
+            new MimeType(null);
             fail("Expected IllegalArgumentException");
         } catch (IllegalArgumentException e) {
             // expected result
@@ -81,14 +72,4 @@ public class MimeTypeTest extends TestCa
         }
     }
 
-    /** Test MimeType setSuperType() */
-    public void testSetSuperType() throws MimeTypeException {
-        try {
-            text.setSuperType(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-    }
-
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Fri Jun 18 12:03:08 2010
@@ -16,6 +16,9 @@
  */
 package org.apache.tika.mime;
 
+import static org.apache.tika.mime.MediaType.OCTET_STREAM;
+import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
+
 import java.io.IOException;
 import java.io.InputStream;
 
@@ -25,6 +28,8 @@ public class MimeTypesTest extends TestC
 
     private MimeTypes types;
 
+    private MediaTypeRegistry registry;
+
     private MimeType binary;
 
     private MimeType text;
@@ -33,11 +38,12 @@ public class MimeTypesTest extends TestC
 
     protected void setUp() throws MimeTypeException {
         types = new MimeTypes();
+        registry = types.getMediaTypeRegistry();
         binary = types.forName("application/octet-stream");
         text = types.forName("text/plain");
-        text.addAlias(MediaType.parse("text/x-plain"));
+        types.addAlias(text, MediaType.parse("text/x-plain"));
         html = types.forName("text/html");
-        html.setSuperType(text);
+        types.setSuperType(html, TEXT_PLAIN);
     }
 
     public void testForName() throws MimeTypeException {
@@ -53,52 +59,38 @@ public class MimeTypesTest extends TestC
     }
 
     public void testSuperType() throws MimeTypeException {
-        assertNull(binary.getSuperType());
-        assertEquals(binary, text.getSuperType());
-        assertEquals(text, html.getSuperType());
+        assertNull(registry.getSuperType(OCTET_STREAM));
+        assertEquals(OCTET_STREAM, registry.getSuperType(TEXT_PLAIN));
+        assertEquals(TEXT_PLAIN, registry.getSuperType(html.getType()));
    }
 
     public void testIsDescendantOf() {
-        assertFalse(binary.isDescendantOf(binary));
-        assertFalse(text.isDescendantOf(text));
-        assertFalse(html.isDescendantOf(html));
-
-        assertTrue(text.isDescendantOf(binary));
-        assertFalse(binary.isDescendantOf(text));
-        
-        assertTrue(html.isDescendantOf(binary));
-        assertFalse(binary.isDescendantOf(html));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(html.getType(), html.getType()));
 
-        assertTrue(html.isDescendantOf(text));
-        assertFalse(text.isDescendantOf(html));
+        assertTrue(registry.isSpecializationOf(html.getType(), OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, html.getType()));
 
-        try {
-            binary.isDescendantOf(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
+        assertTrue(registry.isSpecializationOf(html.getType(), TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, html.getType()));
+
+        assertTrue(registry.isSpecializationOf(TEXT_PLAIN, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, TEXT_PLAIN));
     }
 
     public void testCompareTo() {
         assertTrue(binary.compareTo(binary) == 0);
-        assertTrue(binary.compareTo(text) < 0);
-        assertTrue(binary.compareTo(html) < 0);
+        assertTrue(binary.compareTo(text) != 0);
+        assertTrue(binary.compareTo(html) != 0);
 
-        assertTrue(text.compareTo(binary) > 0);
+        assertTrue(text.compareTo(binary) != 0);
         assertTrue(text.compareTo(text) == 0);
-        assertTrue(text.compareTo(html) < 0);
+        assertTrue(text.compareTo(html) != 0);
 
-        assertTrue(html.compareTo(binary) > 0);
-        assertTrue(html.compareTo(text) > 0);
+        assertTrue(html.compareTo(binary) != 0);
+        assertTrue(html.compareTo(text) != 0);
         assertTrue(html.compareTo(html) == 0);
-
-        try {
-            binary.compareTo(null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
     }
 
     /** Test getMimeType(byte[]) */

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java Fri Jun 18 12:03:08 2010
@@ -16,9 +16,6 @@
  */
 package org.apache.tika.mime;
 
-import java.io.IOException;
-import java.io.InputStream;
-
 import junit.framework.TestCase;
 
 /**
@@ -33,7 +30,7 @@ public class PatternsTest extends TestCa
     private MimeType text;
 
     protected void setUp() throws MimeTypeException {
-        patterns = new Patterns();
+        patterns = new Patterns(new MediaTypeRegistry());
         types = new MimeTypes();
         text = types.forName("text/plain");
     }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=955959&r1=955958&r2=955959&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Fri Jun 18 12:03:08 2010
@@ -257,7 +257,7 @@ public class TestMimeTypes extends TestC
      * @since TIKA-194
      */
     public void testJavaRegex() throws Exception{
-        MimeType testType = new MimeType(this.repo, MediaType.parse("foo/bar"));
+        MimeType testType = new MimeType(MediaType.parse("foo/bar"));
         this.repo.add(testType);
         assertNotNull(repo.forName("foo/bar"));
         String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
@@ -265,8 +265,8 @@ public class TestMimeTypes extends TestC
         String testFileName = "rtg_sst_grb_0.5.12345678";
         assertNotNull(this.repo.getMimeType(testFileName));
         assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
-        
-        MimeType testType2 = new MimeType(this.repo, MediaType.parse("foo/bar2"));
+
+        MimeType testType2 = new MimeType(MediaType.parse("foo/bar2"));
         this.repo.add(testType2);
         assertNotNull(repo.forName("foo/bar2"));
         this.repo.addPattern(testType2, pattern, false);