You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2007/11/05 23:54:27 UTC
svn commit: r592173 - in /incubator/tika/trunk/src:
main/java/org/apache/tika/mime/ main/resources/mime/
test/java/org/apache/tika/mime/
Author: jukka
Date: Mon Nov 5 14:54:26 2007
New Revision: 592173
URL: http://svn.apache.org/viewvc?rev=592173&view=rev
Log:
TIKA-87 - MimeTypes should allow modification of MIME types
- MimeType.setSuperType(MimeType) can now be used to modify inheritance
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java
incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java
incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java
incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/Magic.java Mon Nov 5 14:54:26 2007
@@ -69,9 +69,9 @@
}
public int compareTo(Magic o) {
- int diff = priority - o.priority;
+ int diff = o.priority - priority;
if (diff == 0) {
- diff = size() - o.size();
+ diff = o.size() - size();
}
return diff;
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeType.java Mon Nov 5 14:54:26 2007
@@ -18,6 +18,7 @@
// JDK imports
import java.util.ArrayList;
+import java.util.Collections;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;
@@ -75,8 +76,21 @@
*/
private final String name;
- /** The Mime-Type description */
- private String description = null;
+ /**
+ * Description of this media type.
+ */
+ private String description = "";
+
+ /**
+ * The parent type of this media type, or <code>null</code> if this
+ * is a top-level type.
+ */
+ private MimeType superType = null;
+
+ /**
+ * The child types of this media type.
+ */
+ private final SortedSet<MimeType> subTypes = new TreeSet<MimeType>();
/** The Mime-Type associated recognition patterns */
private final Patterns patterns = new Patterns();
@@ -92,12 +106,6 @@
/** The root-XML associated to this Mime-Type */
private final ArrayList<RootXML> rootXML = new ArrayList<RootXML>();
- /** The sub-class-of associated to this Mime-Type */
- private final ArrayList<String> superTypes = new ArrayList<String>();
-
- /** The mime-type level (regarding its subTypes) */
- private int level = 0;
-
/** The minimum length of data to provides for magic analyzis */
private int minLength = 0;
@@ -105,7 +113,7 @@
* Creates a media type with the give name and containing media type
* registry. The name is expected to be valid and normalized to lower
* case. This constructor should only be called by
- * {@link MimeTypes#forName(String)} to keep the media type mapping
+ * {@link MimeTypes#forName(String)} to keep the media type registry
* up to date.
*
* @param registry the media type registry that contains this type
@@ -119,7 +127,7 @@
}
/**
- * Returns the name of this Internet media type.
+ * Returns the name of this media type.
*
* @return media type name (lower case)
*/
@@ -128,21 +136,69 @@
}
/**
- * Return the description of this mime-type.
+ * Returns the parent of this media type.
+ *
+ * @return parent media type, or <code>null</code>
+ */
+ public MimeType getSuperType() {
+ return superType;
+ }
+
+ public void setSuperType(MimeType type) throws MimeTypeException {
+ assert type != null && type.registry == registry;
+ if (this.isDescendantOf(type)) {
+ // ignore, already a descendant of the given type
+ } else if (this == type) {
+ throw new MimeTypeException(
+ "Media type can not inherit itself: " + type);
+ } else if (type.isDescendantOf(this)) {
+ throw new MimeTypeException(
+ "Media type can not inherit its descendant: " + type);
+ } else if (superType == null) {
+ superType = type;
+ superType.subTypes.add(this);
+ } else if (type.isDescendantOf(superType)) {
+ superType.subTypes.remove(this);
+ superType = type;
+ superType.subTypes.add(this);
+ } else {
+ throw new MimeTypeException(
+ "Conflicting media type inheritance: " + type);
+ }
+ }
+
+ public SortedSet<MimeType> getSubTypes() {
+ return Collections.unmodifiableSortedSet(subTypes);
+ }
+
+ public boolean isDescendantOf(MimeType type) {
+ assert type != null;
+ synchronized (registry) {
+ for (MimeType t = superType; t != null; t = t.superType) {
+ if (t == type) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ /**
+ * Returns the description of this media type.
*
- * @return the description of this mime-type.
+ * @return media type description
*/
public String getDescription() {
return description;
}
/**
- * Set the description of this mime-type.
+ * Set the description of this media type.
*
- * @param description
- * the description of this mime-type.
+ * @param description media type description
*/
- void setDescription(String description) {
+ public void setDescription(String description) {
+ assert description != null;
this.description = description;
}
@@ -151,7 +207,7 @@
*
* @param pattern file name pattern
*/
- public synchronized void addPattern(String pattern) {
+ public void addPattern(String pattern) {
registry.addPattern(this, pattern);
patterns.add(pattern, this);
}
@@ -161,7 +217,7 @@
*
* @return file name patterns
*/
- public synchronized String[] getPatterns() {
+ public String[] getPatterns() {
return patterns.getPatterns();
}
@@ -171,18 +227,18 @@
*
* @return media type aliases
*/
- public synchronized SortedSet<String> getAliases() {
- return new TreeSet<String>(aliases);
+ public SortedSet<String> getAliases() {
+ return Collections.unmodifiableSortedSet(aliases);
}
/**
* Adds an alias name for this media type.
*
* @param alias media type alias (case insensitive)
- * @throws MimeTypeException if the alias is invalid
- * or already registered for another media type
+ * @throws MimeTypeException if the alias is invalid or
+ * already registered for another media type
*/
- public synchronized void addAlias(String alias) throws MimeTypeException {
+ public void addAlias(String alias) throws MimeTypeException {
if (isValid(alias)) {
alias = alias.toLowerCase();
if (!name.equals(alias) && !aliases.contains(alias)) {
@@ -224,31 +280,6 @@
return rootXML.toArray(new RootXML[rootXML.size()]);
}
- void addSuperType(String type) {
- superTypes.add(type);
- }
-
- boolean hasSuperType() {
- return (superTypes.size() > 0);
- }
-
- /**
- * Returns the super types of this mime-type. A type is a super type of
- * another type if any instance of the second type is also an instance of
- * the first.
- */
- public String[] getSuperTypes() {
- return superTypes.toArray(new String[superTypes.size()]);
- }
-
- int getLevel() {
- return level;
- }
-
- void setLevel(int level) {
- this.level = level;
- }
-
Magic[] getMagics() {
return magics.toArray(new Magic[magics.size()]);
}
@@ -354,18 +385,27 @@
//----------------------------------------------------------< Comparable >
- public int compareTo(MimeType o) {
- int diff = level - o.level;
- if (diff == 0) {
- diff = name.compareTo(o.name);
+ public int compareTo(MimeType type) {
+ assert type != null;
+ if (type == this) {
+ return 0;
+ } else if (this.isDescendantOf(type)) {
+ return 1;
+ } else if (type.isDescendantOf(this)) {
+ return -1;
+ } else if (superType != null) {
+ return superType.compareTo(type);
+ } else if (type.superType != null) {
+ return compareTo(type.superType);
+ } else {
+ return name.compareTo(type.name);
}
- return diff;
}
//--------------------------------------------------------------< Object >
/**
- * Returns the name of this Internet media type.
+ * Returns the name of this media type.
*
* @return media type name
*/
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Mon Nov 5 14:54:26 2007
@@ -24,8 +24,6 @@
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
-import java.util.ArrayList;
-import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
@@ -62,12 +60,6 @@
/** List of all registered rootXML */
private SortedSet<MimeType> xmls = new TreeSet<MimeType>();
- private Map<String, List<MimeType>> unsolvedDeps =
- new HashMap<String, List<MimeType>>();
-
- /** The minimum length of data to provide to check all MimeTypes */
- private int minLength = 0;
-
public MimeTypes() {
root = new MimeType(this, DEFAULT);
types.put(root.getName(), root);
@@ -287,6 +279,7 @@
MimeType type = types.get(name);
if (type == null) {
type = new MimeType(this, name);
+ type.setSuperType(root);
types.put(name, type);
}
return type;
@@ -320,7 +313,7 @@
* @param type media type
* @param pattern file name pattern
*/
- synchronized void addPattern(MimeType type, String pattern) {
+ void addPattern(MimeType type, String pattern) {
patterns.add(pattern, type);
}
@@ -359,37 +352,6 @@
* is the mime-type to add.
*/
void add(MimeType type) {
- if (type == null) {
- return;
- }
-
- // Add the new type in the repository
- types.put(type.getName(), type);
-
- // Checks for some unsolved dependencies on this new type
- List<MimeType> deps = unsolvedDeps.remove(type.getName());
- if (deps != null) {
- int level = type.getLevel();
- for (MimeType dep : deps) {
- level = Math.max(level, dep.getLevel() + 1);
- }
- type.setLevel(level);
- }
-
- for (String name : type.getSuperTypes()) {
- MimeType superType = types.get(name);
- if (superType == null) {
- deps = unsolvedDeps.get(name);
- if (deps == null) {
- deps = new ArrayList<MimeType>();
- unsolvedDeps.put(name, deps);
- }
- deps.add(type);
- }
- }
-
- // Update minLentgth
- minLength = Math.max(minLength, type.getMinLength());
// Update the magics index...
if (type.hasMagic()) {
magics.addAll(Arrays.asList(type.getMagics()));
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypesReader.java Mon Nov 5 14:54:26 2007
@@ -31,7 +31,6 @@
// JDK imports
import java.io.InputStream;
-import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
@@ -173,10 +172,17 @@
} else if (nodeElement.getTagName().equals("root-XML")) {
readRootXML(nodeElement, type);
} else if (nodeElement.getTagName().equals("sub-class-of")) {
- readSubClassOf(nodeElement, type);
+ String parent = nodeElement.getAttribute("type");
+ try {
+ type.setSuperType(types.forName(parent));
+ } catch (MimeTypeException e) {
+ logger.warn("Invalid parent type: " + parent, e);
+ }
}
}
}
+
+ types.add(type);
} catch (MimeTypeException e) {
logger.warn("Invalid media type configuration entry: " + name, e);
}
@@ -268,15 +274,8 @@
/** Read Element named root-XML. */
private void readRootXML(Element element, MimeType mimeType) {
-
mimeType.addRootXML(element.getAttribute("namespaceURI"), element
.getAttribute("localName"));
- }
-
- /** Read Element named sub-class-of. */
- private void readSubClassOf(Element element, MimeType mimeType) {
-
- mimeType.addSuperType(element.getAttribute("type"));
}
}
Modified: incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Mon Nov 5 14:54:26 2007
@@ -148,7 +148,7 @@
</mime-type>
<mime-type type="application/xhtml+xml">
- <sub-class-of type="text/xml" />
+ <sub-class-of type="application/xml" />
<glob pattern="*.xhtml" />
<root-XML namespaceURI='http://www.w3.org/1999/xhtml'
localName='html' />
Modified: incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=592173&r1=592172&r2=592173&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/mime/MimeTypesTest.java Mon Nov 5 14:54:26 2007
@@ -22,14 +22,24 @@
private MimeTypes types;
- protected void setUp() {
+ private MimeType binary;
+
+ private MimeType text;
+
+ private MimeType html;
+
+ protected void setUp() throws MimeTypeException {
types = new MimeTypes();
+ binary = types.forName("application/octet-stream");
+ text = types.forName("text/plain");
+ text.addAlias("text/x-plain");
+ html = types.forName("text/html");
+ html.setSuperType(text);
}
public void testForName() throws MimeTypeException {
- assertNotNull(types.forName("text/plain"));
- assertEquals("text/plain", types.forName("text/plain").getName());
- assertEquals("text/plain", types.forName("TEXT/PLAIN").getName());
+ assertEquals(text, types.forName("text/plain"));
+ assertEquals(text, types.forName("TEXT/PLAIN"));
try {
types.forName("invalid");
@@ -39,16 +49,61 @@
}
}
- public void addAlias() throws MimeTypeException {
- types.addAlias(types.forName("text/plain"), "foo/bar");
- assertNotNull(types.forName("foo/bar"));
- assertEquals("text/plain", types.forName("foo/bar").getName());
-
+ public void testAddAlias() throws MimeTypeException {
+ assertEquals(text, types.forName("text/x-plain"));
try {
- types.addAlias(types.forName("text/plain"), "invalid");
+ text.addAlias("invalid");
fail("MimeTypeException not thrown on invalid alias name");
} catch (MimeTypeException e) {
// expected
}
}
+
+ public void testSuperType() throws MimeTypeException {
+ assertNull(binary.getSuperType());
+ assertEquals(binary, text.getSuperType());
+ assertEquals(text, html.getSuperType());
+ }
+
+ public void testSubTypes() {
+ assertEquals(1, binary.getSubTypes().size());
+ assertEquals(
+ "text/plain",
+ binary.getSubTypes().iterator().next().getName());
+ assertEquals(1, text.getSubTypes().size());
+ assertEquals(
+ "text/html",
+ text.getSubTypes().iterator().next().getName());
+ assertEquals(0, html.getSubTypes().size());
+ }
+
+ public void testIsDescendantOf() {
+ assertFalse(binary.isDescendantOf(binary));
+ assertFalse(text.isDescendantOf(text));
+ assertFalse(html.isDescendantOf(html));
+
+ assertTrue(text.isDescendantOf(binary));
+ assertFalse(binary.isDescendantOf(text));
+
+ assertTrue(html.isDescendantOf(binary));
+ assertFalse(binary.isDescendantOf(html));
+
+ assertTrue(html.isDescendantOf(text));
+ assertFalse(text.isDescendantOf(html));
+ }
+
+ public void testCompareTo() {
+ assertTrue(binary.compareTo(binary) == 0);
+ assertTrue(binary.compareTo(text) < 0);
+ assertTrue(binary.compareTo(html) < 0);
+
+ assertTrue(text.compareTo(binary) > 0);
+ assertTrue(text.compareTo(text) == 0);
+ assertTrue(text.compareTo(html) < 0);
+
+ assertTrue(html.compareTo(binary) > 0);
+ assertTrue(html.compareTo(text) > 0);
+ assertTrue(html.compareTo(html) == 0);
+ }
+
}