You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2007/11/05 23:54:27 UTC

svn commit: r592173 - in /incubator/tika/trunk/src: main/java/org/apache/tika/mime/ main/resources/mime/ test/java/org/apache/tika/mime/

Author: jukka
Date: Mon Nov  5 14:54:26 2007
New Revision: 592173

URL: http://svn.apache.org/viewvc?rev=592173&view=rev
Log:
TIKA-87 - MimeTypes should allow modification of MIME types
    - MimeType.setSuperType(MimeType) can now be used to modify inheritance

Modified:
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java Mon Nov  5 14:54:26 2007
@@ -69,9 +69,9 @@
     }
 
     public int compareTo(Magic o) {
-        int diff = priority - o.priority;
+        int diff = o.priority - priority;
         if (diff == 0) {
-            diff = size() - o.size();
+            diff = o.size() - size();
         }
         return diff;
     }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java Mon Nov  5 14:54:26 2007
@@ -18,6 +18,7 @@
 
 // JDK imports
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.SortedSet;
 import java.util.TreeSet;
 import java.util.regex.Pattern;
@@ -75,8 +76,21 @@
      */
     private final String name;
 
-    /** The Mime-Type description */
-    private String description = null;
+    /**
+     * Description of this media type.
+     */
+    private String description = "";
+
+    /**
+     * The parent type of this media type, or <code>null</code> if this
+     * is a top-level type.
+     */
+    private MimeType superType = null;
+
+    /**
+     * The child types of this media type.
+     */
+    private final SortedSet<MimeType> subTypes = new TreeSet<MimeType>();
 
     /** The Mime-Type associated recognition patterns */
     private final Patterns patterns = new Patterns();
@@ -92,12 +106,6 @@
     /** The root-XML associated to this Mime-Type */
     private final ArrayList<RootXML> rootXML = new ArrayList<RootXML>();
 
-    /** The sub-class-of associated to this Mime-Type */
-    private final ArrayList<String> superTypes = new ArrayList<String>();
-
-    /** The mime-type level (regarding its subTypes) */
-    private int level = 0;
-
     /** The minimum length of data to provides for magic analyzis */
     private int minLength = 0;
 
@@ -105,7 +113,7 @@
      * Creates a media type with the give name and containing media type
      * registry. The name is expected to be valid and normalized to lower
      * case. This constructor should only be called by
-     * {@link MimeTypes#forName(String)} to keep the media type mapping
+     * {@link MimeTypes#forName(String)} to keep the media type registry
      * up to date.
      *
      * @param registry the media type registry that contains this type
@@ -119,7 +127,7 @@
     }
 
     /**
-     * Returns the name of this Internet media type.
+     * Returns the name of this media type.
      * 
      * @return media type name (lower case)
      */
@@ -128,21 +136,69 @@
     }
 
     /**
-     * Return the description of this mime-type.
+     * Returns the parent of this media type.
+     *
+     * @return parent media type, or <code>null</code>
+     */
+    public MimeType getSuperType() {
+        return superType;
+    }
+
+    public void setSuperType(MimeType type) throws MimeTypeException {
+        assert type != null && type.registry == registry;
+        if (this.isDescendantOf(type)) {
+            // ignore, already a descendant of the given type
+        } else if (this == type) {
+            throw new MimeTypeException(
+                    "Media type can not inherit itself: " + type);
+        } else if (type.isDescendantOf(this)) {
+            throw new MimeTypeException(
+                    "Media type can not inherit its descendant: " + type);
+        } else if (superType == null) {
+            superType = type;
+            superType.subTypes.add(this);
+        } else if (type.isDescendantOf(superType)) {
+            superType.subTypes.remove(this);
+            superType = type;
+            superType.subTypes.add(this);
+        } else {
+            throw new MimeTypeException(
+                    "Conflicting media type inheritance: " + type);
+        }
+    }
+
+    public SortedSet<MimeType> getSubTypes() {
+        return Collections.unmodifiableSortedSet(subTypes);
+    }
+
+    public boolean isDescendantOf(MimeType type) {
+        assert type != null;
+        synchronized (registry) {
+            for (MimeType t = superType; t != null; t = t.superType) {
+                if (t == type) {
+                    return true;
+                }
+            }
+            return false; 
+        }
+    }
+
+    /**
+     * Returns the description of this media type.
      * 
-     * @return the description of this mime-type.
+     * @return media type description
      */
     public String getDescription() {
         return description;
     }
 
     /**
-     * Set the description of this mime-type.
+     * Set the description of this media type.
      * 
-     * @param description
-     *            the description of this mime-type.
+     * @param description media type description
      */
-    void setDescription(String description) {
+    public void setDescription(String description) {
+        assert description != null;
         this.description = description;
     }
 
@@ -151,7 +207,7 @@
      *
      * @param pattern file name pattern
      */
-    public synchronized void addPattern(String pattern) {
+    public void addPattern(String pattern) {
         registry.addPattern(this, pattern);
         patterns.add(pattern, this);
     }
@@ -161,7 +217,7 @@
      * 
      * @return file name patterns
      */
-    public synchronized String[] getPatterns() {
+    public String[] getPatterns() {
         return patterns.getPatterns();
     }
 
@@ -171,18 +227,18 @@
      *
      * @return media type aliases
      */
-    public synchronized SortedSet<String> getAliases() {
-        return new TreeSet<String>(aliases);
+    public SortedSet<String> getAliases() {
+        return Collections.unmodifiableSortedSet(aliases);
     }
 
     /**
      * Adds an alias name for this media type.
      *
      * @param alias media type alias (case insensitive)
-     * @throws MimeTypeException if the alias is invalid
-     *                           or already registered for another media type
+     * @throws MimeTypeException if the alias is invalid or
+     *                           already registered for another media type
      */
-    public synchronized void addAlias(String alias) throws MimeTypeException {
+    public void addAlias(String alias) throws MimeTypeException {
         if (isValid(alias)) {
             alias = alias.toLowerCase();
             if (!name.equals(alias) && !aliases.contains(alias)) {
@@ -224,31 +280,6 @@
         return rootXML.toArray(new RootXML[rootXML.size()]);
     }
 
-    void addSuperType(String type) {
-        superTypes.add(type);
-    }
-
-    boolean hasSuperType() {
-        return (superTypes.size() > 0);
-    }
-
-    /**
-     * Returns the super types of this mime-type. A type is a super type of
-     * another type if any instance of the second type is also an instance of
-     * the first.
-     */
-    public String[] getSuperTypes() {
-        return superTypes.toArray(new String[superTypes.size()]);
-    }
-
-    int getLevel() {
-        return level;
-    }
-
-    void setLevel(int level) {
-        this.level = level;
-    }
-
     Magic[] getMagics() {
         return magics.toArray(new Magic[magics.size()]);
     }
@@ -354,18 +385,27 @@
 
     //----------------------------------------------------------< Comparable >
 
-    public int compareTo(MimeType o) {
-        int diff = level - o.level;
-        if (diff == 0) {
-            diff = name.compareTo(o.name);
+    public int compareTo(MimeType type) {
+        assert type != null;
+        if (type == this) {
+            return 0;
+        } else if (this.isDescendantOf(type)) {
+            return 1;
+        } else if (type.isDescendantOf(this)) {
+            return -1;
+        } else if (superType != null) {
+            return superType.compareTo(type);
+        } else if (type.superType != null) {
+            return compareTo(type.superType);
+        } else {
+            return name.compareTo(type.name);
         }
-        return diff;
     }
 
     //--------------------------------------------------------------< Object >
 
     /**
-     * Returns the name of this Internet media type.
+     * Returns the name of this media type.
      *
      * @return media type name
      */

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Mon Nov  5 14:54:26 2007
@@ -24,8 +24,6 @@
 import java.util.Arrays;
 import java.util.Map;
 import java.util.HashMap;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
@@ -62,12 +60,6 @@
     /** List of all registered rootXML */
     private SortedSet<MimeType> xmls = new TreeSet<MimeType>();
 
-    private Map<String, List<MimeType>> unsolvedDeps =
-        new HashMap<String, List<MimeType>>();
-
-    /** The minimum length of data to provide to check all MimeTypes */
-    private int minLength = 0;
-
     public MimeTypes() {
         root = new MimeType(this, DEFAULT);
         types.put(root.getName(), root);
@@ -287,6 +279,7 @@
             MimeType type = types.get(name);
             if (type == null) {
                 type = new MimeType(this, name);
+                type.setSuperType(root);
                 types.put(name, type);
             }
             return type;
@@ -320,7 +313,7 @@
      * @param type media type
      * @param pattern file name pattern
      */
-    synchronized void addPattern(MimeType type, String pattern) {
+    void addPattern(MimeType type, String pattern) {
         patterns.add(pattern, type);
     }
 
@@ -359,37 +352,6 @@
      *            is the mime-type to add.
      */
     void add(MimeType type) {
-        if (type == null) {
-            return;
-        }
-
-        // Add the new type in the repository
-        types.put(type.getName(), type);
-
-        // Checks for some unsolved dependencies on this new type
-        List<MimeType> deps = unsolvedDeps.remove(type.getName());
-        if (deps != null) {
-            int level = type.getLevel();
-            for (MimeType dep : deps) {
-                level = Math.max(level, dep.getLevel() + 1);
-            }
-            type.setLevel(level);
-        }
-
-        for (String name : type.getSuperTypes()) {
-            MimeType superType = types.get(name);
-            if (superType == null) {
-                deps = unsolvedDeps.get(name);
-                if (deps == null) {
-                    deps = new ArrayList<MimeType>();
-                    unsolvedDeps.put(name, deps);
-                }
-                deps.add(type);
-            }
-        }
-
-        // Update minLentgth
-        minLength = Math.max(minLength, type.getMinLength());
         // Update the magics index...
         if (type.hasMagic()) {
             magics.addAll(Arrays.asList(type.getMagics()));

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java Mon Nov  5 14:54:26 2007
@@ -31,7 +31,6 @@
 
 // JDK imports
 import java.io.InputStream;
-import java.util.ArrayList;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
@@ -173,10 +172,17 @@
                     } else if (nodeElement.getTagName().equals("root-XML")) {
                         readRootXML(nodeElement, type);
                     } else if (nodeElement.getTagName().equals("sub-class-of")) {
-                        readSubClassOf(nodeElement, type);
+                        String parent = nodeElement.getAttribute("type");
+                        try {
+                            type.setSuperType(types.forName(parent));
+                        } catch (MimeTypeException e) {
+                            logger.warn("Invalid parent type: " + parent, e);
+                        }
                     }
                 }
             }
+
+            types.add(type);
         } catch (MimeTypeException e) {
             logger.warn("Invalid media type configuration entry: " + name, e);
         }
@@ -268,15 +274,8 @@
 
     /** Read Element named root-XML. */
     private void readRootXML(Element element, MimeType mimeType) {
-
         mimeType.addRootXML(element.getAttribute("namespaceURI"), element
                 .getAttribute("localName"));
-    }
-
-    /** Read Element named sub-class-of. */
-    private void readSubClassOf(Element element, MimeType mimeType) {
-
-        mimeType.addSuperType(element.getAttribute("type"));
     }
 
 }

Modified: incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Mon Nov  5 14:54:26 2007
@@ -148,7 +148,7 @@
 	</mime-type>
 
 	<mime-type type="application/xhtml+xml">
-		<sub-class-of type="text/xml" />
+		<sub-class-of type="application/xml" />
 		<glob pattern="*.xhtml" />
 		<root-XML namespaceURI='http://www.w3.org/1999/xhtml'
 			localName='html' />

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java Mon Nov  5 14:54:26 2007
@@ -22,14 +22,24 @@
 
     private MimeTypes types;
 
-    protected void setUp() {
+    private MimeType binary;
+
+    private MimeType text;
+
+    private MimeType html;
+
+    protected void setUp() throws MimeTypeException {
         types = new MimeTypes();
+        binary = types.forName("application/octet-stream");
+        text = types.forName("text/plain");
+        text.addAlias("text/x-plain");
+        html = types.forName("text/html");
+        html.setSuperType(text);
     }
 
     public void testForName() throws MimeTypeException {
-        assertNotNull(types.forName("text/plain"));
-        assertEquals("text/plain", types.forName("text/plain").getName());
-        assertEquals("text/plain", types.forName("TEXT/PLAIN").getName());
+        assertEquals(text, types.forName("text/plain"));
+        assertEquals(text, types.forName("TEXT/PLAIN"));
 
         try {
             types.forName("invalid");
@@ -39,16 +49,61 @@
         }
     }
 
-    public void addAlias() throws MimeTypeException {
-        types.addAlias(types.forName("text/plain"), "foo/bar");
-        assertNotNull(types.forName("foo/bar"));
-        assertEquals("text/plain", types.forName("foo/bar").getName());
-
+    public void testAddAlias() throws MimeTypeException {
+        assertEquals(text, types.forName("text/x-plain"));
         try {
-            types.addAlias(types.forName("text/plain"), "invalid");
+            text.addAlias("invalid");
             fail("MimeTypeException not thrown on invalid alias name");
         } catch (MimeTypeException e) {
             // expected
         }
     }
+
+    public void testSuperType() throws MimeTypeException {
+        assertNull(binary.getSuperType());
+        assertEquals(binary, text.getSuperType());
+        assertEquals(text, html.getSuperType());
+   }
+
+    public void testSubTypes() {
+        assertEquals(1, binary.getSubTypes().size());
+        assertEquals(
+                "text/plain",
+                binary.getSubTypes().iterator().next().getName());
+        assertEquals(1, text.getSubTypes().size());
+        assertEquals(
+                "text/html",
+                text.getSubTypes().iterator().next().getName());
+        assertEquals(0, html.getSubTypes().size());
+    }
+
+    public void testIsDescendantOf() {
+        assertFalse(binary.isDescendantOf(binary));
+        assertFalse(text.isDescendantOf(text));
+        assertFalse(html.isDescendantOf(html));
+
+        assertTrue(text.isDescendantOf(binary));
+        assertFalse(binary.isDescendantOf(text));
+        
+        assertTrue(html.isDescendantOf(binary));
+        assertFalse(binary.isDescendantOf(html));
+
+        assertTrue(html.isDescendantOf(text));
+        assertFalse(text.isDescendantOf(html));
+    }
+
+    public void testCompareTo() {
+        assertTrue(binary.compareTo(binary) == 0);
+        assertTrue(binary.compareTo(text) < 0);
+        assertTrue(binary.compareTo(html) < 0);
+
+        assertTrue(text.compareTo(binary) > 0);
+        assertTrue(text.compareTo(text) == 0);
+        assertTrue(text.compareTo(html) < 0);
+
+        assertTrue(html.compareTo(binary) > 0);
+        assertTrue(html.compareTo(text) > 0);
+        assertTrue(html.compareTo(html) == 0);
+    }
+
 }