You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/28 15:05:14 UTC
svn commit: r938976 - in /lucene/tika/trunk:
tika-core/src/main/java/org/apache/tika/config/
tika-core/src/main/java/org/apache/tika/mime/
tika-core/src/main/java/org/apache/tika/utils/
tika-parsers/src/test/java/org/apache/tika/ tika-parsers/src/test/...
Author: jukka
Date: Wed Apr 28 13:05:14 2010
New Revision: 938976
URL: http://svn.apache.org/viewvc?rev=938976&view=rev
Log:
TIKA-89: Rename MimeType and MimeTypes
Make MimeType use MediaType internally. The plan is to ultimately get rid of the MimeType class.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Wed Apr 28 13:05:14 2010
@@ -191,7 +191,7 @@ public class TikaConfig {
* @param mimeType MIME type
* @return configured Parser instance, or <code>null</code>
*/
- public Parser getParser(String mimeType) {
+ public Parser getParser(MediaType mimeType) {
return parsers.get(mimeType);
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -28,7 +28,7 @@ import java.util.regex.Pattern;
/**
* Internet media type.
*/
-public final class MediaType {
+public final class MediaType implements Comparable<MediaType> {
private static final Map<String, String> NO_PARAMETERS =
new TreeMap<String, String>();
@@ -254,4 +254,8 @@ public final class MediaType {
return hash;
}
+ public int compareTo(MediaType that) {
+ return toString().compareTo(that.toString());
+ }
+
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -67,9 +67,9 @@ public final class MimeType implements C
private final MimeTypes registry;
/**
- * Lower case name of this media type.
+ * The normalized media type name.
*/
- private final String name;
+ private final MediaType type;
/**
* Description of this media type.
@@ -99,17 +99,26 @@ public final class MimeType implements C
* up to date.
*
* @param registry the media type registry that contains this type
- * @param name media type name
+ * @param type normalized media type name
*/
- MimeType(MimeTypes registry, String name) {
+ MimeType(MimeTypes registry, MediaType type) {
if (registry == null) {
throw new IllegalArgumentException("Registry is missing");
}
- if (!MimeType.isValid(name) || !name.equals(name.toLowerCase())) {
- throw new IllegalArgumentException("Media type name is invalid");
+ if (type == null) {
+ throw new IllegalArgumentException("Media type name is missing");
}
this.registry = registry;
- this.name = name;
+ this.type = type;
+ }
+
+ /**
+ * Returns the normalized media type name.
+ *
+ * @return media type
+ */
+ public MediaType getType() {
+ return type;
}
/**
@@ -118,7 +127,7 @@ public final class MimeType implements C
* @return media type name (lower case)
*/
public String getName() {
- return name;
+ return type.toString();
}
/**
@@ -197,14 +206,9 @@ public final class MimeType implements C
* @throws MimeTypeException if the alias is invalid or
* already registered for another media type
*/
- public void addAlias(String alias) throws MimeTypeException {
- if (isValid(alias)) {
- alias = alias.toLowerCase();
- if (!name.equals(alias)) {
- registry.addAlias(this, alias);
- }
- } else {
- throw new MimeTypeException("Invalid media type alias: " + alias);
+ public void addAlias(MediaType alias) throws MimeTypeException {
+ if (!alias.isSpecializationOf(type)) {
+ registry.addAlias(this, alias);
}
}
@@ -347,22 +351,21 @@ public final class MimeType implements C
//----------------------------------------------------------< Comparable >
- public int compareTo(MimeType type) {
- if (type == null) {
+ public int compareTo(MimeType mime) {
+ if (mime == null) {
throw new IllegalArgumentException("MimeType is missing");
- }
- if (type == this) {
+ } else if (mime == this) {
return 0;
- } else if (this.isDescendantOf(type)) {
+ } else if (this.isDescendantOf(mime)) {
return 1;
- } else if (type.isDescendantOf(this)) {
+ } else if (mime.isDescendantOf(this)) {
return -1;
} else if (superType != null) {
- return superType.compareTo(type);
- } else if (type.superType != null) {
- return compareTo(type.superType);
+ return superType.compareTo(mime);
+ } else if (mime.superType != null) {
+ return compareTo(mime.superType);
} else {
- return name.compareTo(type.name);
+ return type.compareTo(mime.type);
}
}
@@ -374,7 +377,7 @@ public final class MimeType implements C
* @return media type name
*/
public String toString() {
- return name;
+ return type.toString();
}
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Wed Apr 28 13:05:14 2010
@@ -119,7 +119,8 @@ public final class MimeTypes implements
private final MimeType xmlMimeType;
/** All the registered MimeTypes indexed on their name */
- private final Map<String, MimeType> types = new HashMap<String, MimeType>();
+ private final Map<MediaType, MimeType> types =
+ new HashMap<MediaType, MimeType>();
/** The patterns matcher */
private Patterns patterns = new Patterns();
@@ -133,9 +134,9 @@ public final class MimeTypes implements
private final XmlRootExtractor xmlRootExtractor;
public MimeTypes() {
- rootMimeType = new MimeType(this, OCTET_STREAM);
- textMimeType = new MimeType(this, PLAIN_TEXT);
- xmlMimeType = new MimeType(this, XML);
+ rootMimeType = new MimeType(this, MediaType.OCTET_STREAM);
+ textMimeType = new MimeType(this, MediaType.TEXT_PLAIN);
+ xmlMimeType = new MimeType(this, MediaType.APPLICATION_XML);
try {
textMimeType.setSuperType(rootMimeType);
@@ -144,9 +145,9 @@ public final class MimeTypes implements
throw new IllegalStateException("Error in MimeType logic", e);
}
- types.put(rootMimeType.getName(), rootMimeType);
- types.put(textMimeType.getName(), textMimeType);
- types.put(xmlMimeType.getName(), xmlMimeType);
+ types.put(rootMimeType.getType(), rootMimeType);
+ types.put(textMimeType.getType(), textMimeType);
+ types.put(xmlMimeType.getType(), xmlMimeType);
try {
xmlRootExtractor = new XmlRootExtractor();
@@ -397,21 +398,21 @@ public final class MimeTypes implements
*/
public synchronized MimeType forName(String name)
throws MimeTypeException {
- if (MimeType.isValid(name)) {
- name = name.toLowerCase(Locale.ENGLISH);
- MimeType type = types.get(name);
- if (type == null) {
- type = new MimeType(this, name);
- if (name.startsWith("text/")) {
- type.setSuperType(textMimeType);
- } else if (name.endsWith("+xml")) {
- type.setSuperType(xmlMimeType);
+ MediaType type = MediaType.parse(name);
+ if (type != null) {
+ MimeType mime = types.get(type);
+ if (mime == null) {
+ mime = new MimeType(this, type);
+ if ("text".equals(type.getType())) {
+ mime.setSuperType(textMimeType);
+ } else if (type.getSubtype().endsWith("+xml")) {
+ mime.setSuperType(xmlMimeType);
} else {
- type.setSuperType(rootMimeType);
+ mime.setSuperType(rootMimeType);
}
- types.put(name, type);
+ types.put(type, mime);
}
- return type;
+ return mime;
} else {
throw new MimeTypeException("Invalid media type name: " + name);
}
@@ -425,7 +426,7 @@ public final class MimeTypes implements
* @param alias media type alias (normalized to lower case)
* @throws MimeTypeException if the alias already exists
*/
- synchronized void addAlias(MimeType type, String alias)
+ synchronized void addAlias(MimeType type, MediaType alias)
throws MimeTypeException {
if (!types.containsKey(alias)) {
types.put(alias, type);
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Wed Apr 28 13:05:14 2010
@@ -156,7 +156,13 @@ final class MimeTypesReader implements M
readMagic(nodeElement, type);
} else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
- type.addAlias(alias);
+ MediaType aliasType = MediaType.parse(alias);
+ if (aliasType != null) {
+ type.addAlias(aliasType);
+ } else {
+ throw new MimeTypeException(
+ "Invalid media type alias: " + alias);
+ }
} else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
readRootXML(nodeElement, type);
} else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java Wed Apr 28 13:05:14 2010
@@ -28,6 +28,7 @@ import org.apache.tika.config.TikaConfig
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMimeKeys;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
@@ -52,7 +53,7 @@ public class ParseUtils implements TikaM
*/
public static Parser getParser(String mimeType, TikaConfig config)
throws TikaException {
- return config.getParser(mimeType);
+ return config.getParser(MediaType.parse(mimeType));
}
/**
@@ -106,7 +107,7 @@ public class ParseUtils implements TikaM
InputStream stream, TikaConfig config, String mimeType)
throws TikaException, IOException {
try {
- Parser parser = config.getParser(mimeType);
+ Parser parser = config.getParser(MediaType.parse(mimeType));
ContentHandler handler = new BodyContentHandler();
parser.parse(stream, handler, new Metadata());
return handler.toString();
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Wed Apr 28 13:05:14 2010
@@ -26,6 +26,7 @@ import junit.framework.TestCase;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
import org.apache.tika.utils.ParseUtils;
import org.xml.sax.helpers.DefaultHandler;
@@ -78,7 +79,8 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc,
"application/vnd.ms-powerpoint");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/vnd.ms-powerpoint");
+ Parser parser =
+ tc.getParser(MediaType.parse("application/vnd.ms-powerpoint"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -94,7 +96,7 @@ public class TestParsers extends TestCas
String s1 = ParseUtils.getStringContent(file, tc);
String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/msword");
+ Parser parser = tc.getParser(MediaType.parse("application/msword"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -114,7 +116,8 @@ public class TestParsers extends TestCas
assertEquals(s1, s2);
assertTrue("Text does not contain '" + expected + "'", s1
.contains(expected));
- Parser parser = tc.getParser("application/vnd.ms-excel");
+ Parser parser =
+ tc.getParser(MediaType.parse("application/vnd.ms-excel"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -147,7 +150,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "text/html");
assertEquals(s1, s2);
- Parser parser = tc.getParser("text/html");
+ Parser parser = tc.getParser(MediaType.parse("text/html"));
assertNotNull(parser);
}
@@ -157,7 +160,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "application/zip");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/zip");
+ Parser parser = tc.getParser(MediaType.parse("application/zip"));
assertNotNull(parser);
}
@@ -167,7 +170,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "audio/mpeg");
assertEquals(s1, s2);
- Parser parser = tc.getParser("audio/mpeg");
+ Parser parser = tc.getParser(MediaType.parse("audio/mpeg"));
assertNotNull(parser);
}
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Wed Apr 28 13:05:14 2010
@@ -33,7 +33,7 @@ public class MimeTypeTest extends TestCa
// Missing registry
try {
- new MimeType(null, "text/plain");
+ new MimeType(null, MediaType.TEXT_PLAIN);
fail("Expected IllegalArgumentException");
} catch (IllegalArgumentException e) {
// expected result
@@ -46,22 +46,6 @@ public class MimeTypeTest extends TestCa
} catch (IllegalArgumentException e) {
// expected result
}
-
- // Invalid name (no slash)
- try {
- new MimeType(types, "application");
- fail("Expected IllegalArgumentException");
- } catch (IllegalArgumentException e) {
- // expected result
- }
-
- // Invalid name (not lower case)
- try {
- new MimeType(types, "TEXT/PLAIN");
- fail("Expected IllegalArgumentException");
- } catch (IllegalArgumentException e) {
- // expected result
- }
}
public void testIsValidName() {
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Wed Apr 28 13:05:14 2010
@@ -35,7 +35,7 @@ public class MimeTypesTest extends TestC
types = new MimeTypes();
binary = types.forName("application/octet-stream");
text = types.forName("text/plain");
- text.addAlias("text/x-plain");
+ text.addAlias(MediaType.parse("text/x-plain"));
html = types.forName("text/html");
html.setSuperType(text);
}
@@ -52,16 +52,6 @@ public class MimeTypesTest extends TestC
}
}
- public void testAddAlias() throws MimeTypeException {
- assertEquals(text, types.forName("text/x-plain"));
- try {
- text.addAlias("invalid");
- fail("MimeTypeException not thrown on invalid alias name");
- } catch (MimeTypeException e) {
- // expected
- }
- }
-
public void testSuperType() throws MimeTypeException {
assertNull(binary.getSuperType());
assertEquals(binary, text.getSuperType());
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Apr 28 13:05:14 2010
@@ -250,16 +250,16 @@ public class TestMimeTypes extends TestC
* @since TIKA-194
*/
public void testJavaRegex() throws Exception{
- MimeType testType = new MimeType(this.repo, "foo/bar");
+ MimeType testType = new MimeType(this.repo, MediaType.parse("foo/bar"));
this.repo.add(testType);
assertNotNull(repo.forName("foo/bar"));
String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
this.repo.addPattern(testType, pattern, true);
String testFileName = "rtg_sst_grb_0.5.12345678";
assertNotNull(this.repo.getMimeType(testFileName));
- assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
+ assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
- MimeType testType2 = new MimeType(this.repo, "foo/bar2");
+ MimeType testType2 = new MimeType(this.repo, MediaType.parse("foo/bar2"));
this.repo.add(testType2);
assertNotNull(repo.forName("foo/bar2"));
this.repo.addPattern(testType2, pattern, false);
Re: svn commit: r938976 - in /lucene/tika/trunk:
tika-core/src/main/java/org/apache/tika/config/
tika-core/src/main/java/org/apache/tika/mime/
tika-core/src/main/java/org/apache/tika/utils/
tika-parsers/src/test/java/org/apache/tika/ tika-parsers/src/test
Posted by "Mattmann, Chris A (388J)" <ch...@jpl.nasa.gov>.
+1, I've found it confusing and I like MediaType better!
On 4/28/10 6:05 AM, "jukka@apache.org" <ju...@apache.org> wrote:
Author: jukka
Date: Wed Apr 28 13:05:14 2010
New Revision: 938976
URL: http://svn.apache.org/viewvc?rev=938976&view=rev
Log:
TIKA-89: Rename MimeType and MimeTypes
Make MimeType use MediaType internally. The plan is to ultimately get rid of the MimeType class.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Wed Apr 28 13:05:14 2010
@@ -191,7 +191,7 @@ public class TikaConfig {
* @param mimeType MIME type
* @return configured Parser instance, or <code>null</code>
*/
- public Parser getParser(String mimeType) {
+ public Parser getParser(MediaType mimeType) {
return parsers.get(mimeType);
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -28,7 +28,7 @@ import java.util.regex.Pattern;
/**
* Internet media type.
*/
-public final class MediaType {
+public final class MediaType implements Comparable<MediaType> {
private static final Map<String, String> NO_PARAMETERS =
new TreeMap<String, String>();
@@ -254,4 +254,8 @@ public final class MediaType {
return hash;
}
+ public int compareTo(MediaType that) {
+ return toString().compareTo(that.toString());
+ }
+
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Wed Apr 28 13:05:14 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -67,9 +67,9 @@ public final class MimeType implements C
private final MimeTypes registry;
/**
- * Lower case name of this media type.
+ * The normalized media type name.
*/
- private final String name;
+ private final MediaType type;
/**
* Description of this media type.
@@ -99,17 +99,26 @@ public final class MimeType implements C
* up to date.
*
* @param registry the media type registry that contains this type
- * @param name media type name
+ * @param type normalized media type name
*/
- MimeType(MimeTypes registry, String name) {
+ MimeType(MimeTypes registry, MediaType type) {
if (registry == null) {
throw new IllegalArgumentException("Registry is missing");
}
- if (!MimeType.isValid(name) || !name.equals(name.toLowerCase())) {
- throw new IllegalArgumentException("Media type name is invalid");
+ if (type == null) {
+ throw new IllegalArgumentException("Media type name is missing");
}
this.registry = registry;
- this.name = name;
+ this.type = type;
+ }
+
+ /**
+ * Returns the normalized media type name.
+ *
+ * @return media type
+ */
+ public MediaType getType() {
+ return type;
}
/**
@@ -118,7 +127,7 @@ public final class MimeType implements C
* @return media type name (lower case)
*/
public String getName() {
- return name;
+ return type.toString();
}
/**
@@ -197,14 +206,9 @@ public final class MimeType implements C
* @throws MimeTypeException if the alias is invalid or
* already registered for another media type
*/
- public void addAlias(String alias) throws MimeTypeException {
- if (isValid(alias)) {
- alias = alias.toLowerCase();
- if (!name.equals(alias)) {
- registry.addAlias(this, alias);
- }
- } else {
- throw new MimeTypeException("Invalid media type alias: " + alias);
+ public void addAlias(MediaType alias) throws MimeTypeException {
+ if (!alias.isSpecializationOf(type)) {
+ registry.addAlias(this, alias);
}
}
@@ -347,22 +351,21 @@ public final class MimeType implements C
//----------------------------------------------------------< Comparable >
- public int compareTo(MimeType type) {
- if (type == null) {
+ public int compareTo(MimeType mime) {
+ if (mime == null) {
throw new IllegalArgumentException("MimeType is missing");
- }
- if (type == this) {
+ } else if (mime == this) {
return 0;
- } else if (this.isDescendantOf(type)) {
+ } else if (this.isDescendantOf(mime)) {
return 1;
- } else if (type.isDescendantOf(this)) {
+ } else if (mime.isDescendantOf(this)) {
return -1;
} else if (superType != null) {
- return superType.compareTo(type);
- } else if (type.superType != null) {
- return compareTo(type.superType);
+ return superType.compareTo(mime);
+ } else if (mime.superType != null) {
+ return compareTo(mime.superType);
} else {
- return name.compareTo(type.name);
+ return type.compareTo(mime.type);
}
}
@@ -374,7 +377,7 @@ public final class MimeType implements C
* @return media type name
*/
public String toString() {
- return name;
+ return type.toString();
}
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Wed Apr 28 13:05:14 2010
@@ -119,7 +119,8 @@ public final class MimeTypes implements
private final MimeType xmlMimeType;
/** All the registered MimeTypes indexed on their name */
- private final Map<String, MimeType> types = new HashMap<String, MimeType>();
+ private final Map<MediaType, MimeType> types =
+ new HashMap<MediaType, MimeType>();
/** The patterns matcher */
private Patterns patterns = new Patterns();
@@ -133,9 +134,9 @@ public final class MimeTypes implements
private final XmlRootExtractor xmlRootExtractor;
public MimeTypes() {
- rootMimeType = new MimeType(this, OCTET_STREAM);
- textMimeType = new MimeType(this, PLAIN_TEXT);
- xmlMimeType = new MimeType(this, XML);
+ rootMimeType = new MimeType(this, MediaType.OCTET_STREAM);
+ textMimeType = new MimeType(this, MediaType.TEXT_PLAIN);
+ xmlMimeType = new MimeType(this, MediaType.APPLICATION_XML);
try {
textMimeType.setSuperType(rootMimeType);
@@ -144,9 +145,9 @@ public final class MimeTypes implements
throw new IllegalStateException("Error in MimeType logic", e);
}
- types.put(rootMimeType.getName(), rootMimeType);
- types.put(textMimeType.getName(), textMimeType);
- types.put(xmlMimeType.getName(), xmlMimeType);
+ types.put(rootMimeType.getType(), rootMimeType);
+ types.put(textMimeType.getType(), textMimeType);
+ types.put(xmlMimeType.getType(), xmlMimeType);
try {
xmlRootExtractor = new XmlRootExtractor();
@@ -397,21 +398,21 @@ public final class MimeTypes implements
*/
public synchronized MimeType forName(String name)
throws MimeTypeException {
- if (MimeType.isValid(name)) {
- name = name.toLowerCase(Locale.ENGLISH);
- MimeType type = types.get(name);
- if (type == null) {
- type = new MimeType(this, name);
- if (name.startsWith("text/")) {
- type.setSuperType(textMimeType);
- } else if (name.endsWith("+xml")) {
- type.setSuperType(xmlMimeType);
+ MediaType type = MediaType.parse(name);
+ if (type != null) {
+ MimeType mime = types.get(type);
+ if (mime == null) {
+ mime = new MimeType(this, type);
+ if ("text".equals(type.getType())) {
+ mime.setSuperType(textMimeType);
+ } else if (type.getSubtype().endsWith("+xml")) {
+ mime.setSuperType(xmlMimeType);
} else {
- type.setSuperType(rootMimeType);
+ mime.setSuperType(rootMimeType);
}
- types.put(name, type);
+ types.put(type, mime);
}
- return type;
+ return mime;
} else {
throw new MimeTypeException("Invalid media type name: " + name);
}
@@ -425,7 +426,7 @@ public final class MimeTypes implements
* @param alias media type alias (normalized to lower case)
* @throws MimeTypeException if the alias already exists
*/
- synchronized void addAlias(MimeType type, String alias)
+ synchronized void addAlias(MimeType type, MediaType alias)
throws MimeTypeException {
if (!types.containsKey(alias)) {
types.put(alias, type);
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Wed Apr 28 13:05:14 2010
@@ -156,7 +156,13 @@ final class MimeTypesReader implements M
readMagic(nodeElement, type);
} else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
- type.addAlias(alias);
+ MediaType aliasType = MediaType.parse(alias);
+ if (aliasType != null) {
+ type.addAlias(aliasType);
+ } else {
+ throw new MimeTypeException(
+ "Invalid media type alias: " + alias);
+ }
} else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
readRootXML(nodeElement, type);
} else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java Wed Apr 28 13:05:14 2010
@@ -28,6 +28,7 @@ import org.apache.tika.config.TikaConfig
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMimeKeys;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
@@ -52,7 +53,7 @@ public class ParseUtils implements TikaM
*/
public static Parser getParser(String mimeType, TikaConfig config)
throws TikaException {
- return config.getParser(mimeType);
+ return config.getParser(MediaType.parse(mimeType));
}
/**
@@ -106,7 +107,7 @@ public class ParseUtils implements TikaM
InputStream stream, TikaConfig config, String mimeType)
throws TikaException, IOException {
try {
- Parser parser = config.getParser(mimeType);
+ Parser parser = config.getParser(MediaType.parse(mimeType));
ContentHandler handler = new BodyContentHandler();
parser.parse(stream, handler, new Metadata());
return handler.toString();
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Wed Apr 28 13:05:14 2010
@@ -26,6 +26,7 @@ import junit.framework.TestCase;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
import org.apache.tika.utils.ParseUtils;
import org.xml.sax.helpers.DefaultHandler;
@@ -78,7 +79,8 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc,
"application/vnd.ms-powerpoint");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/vnd.ms-powerpoint");
+ Parser parser =
+ tc.getParser(MediaType.parse("application/vnd.ms-powerpoint"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -94,7 +96,7 @@ public class TestParsers extends TestCas
String s1 = ParseUtils.getStringContent(file, tc);
String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/msword");
+ Parser parser = tc.getParser(MediaType.parse("application/msword"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -114,7 +116,8 @@ public class TestParsers extends TestCas
assertEquals(s1, s2);
assertTrue("Text does not contain '" + expected + "'", s1
.contains(expected));
- Parser parser = tc.getParser("application/vnd.ms-excel");
+ Parser parser =
+ tc.getParser(MediaType.parse("application/vnd.ms-excel"));
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -147,7 +150,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "text/html");
assertEquals(s1, s2);
- Parser parser = tc.getParser("text/html");
+ Parser parser = tc.getParser(MediaType.parse("text/html"));
assertNotNull(parser);
}
@@ -157,7 +160,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "application/zip");
assertEquals(s1, s2);
- Parser parser = tc.getParser("application/zip");
+ Parser parser = tc.getParser(MediaType.parse("application/zip"));
assertNotNull(parser);
}
@@ -167,7 +170,7 @@ public class TestParsers extends TestCas
String s2 = ParseUtils.getStringContent(file, tc, "audio/mpeg");
assertEquals(s1, s2);
- Parser parser = tc.getParser("audio/mpeg");
+ Parser parser = tc.getParser(MediaType.parse("audio/mpeg"));
assertNotNull(parser);
}
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Wed Apr 28 13:05:14 2010
@@ -33,7 +33,7 @@ public class MimeTypeTest extends TestCa
// Missing registry
try {
- new MimeType(null, "text/plain");
+ new MimeType(null, MediaType.TEXT_PLAIN);
fail("Expected IllegalArgumentException");
} catch (IllegalArgumentException e) {
// expected result
@@ -46,22 +46,6 @@ public class MimeTypeTest extends TestCa
} catch (IllegalArgumentException e) {
// expected result
}
-
- // Invalid name (no slash)
- try {
- new MimeType(types, "application");
- fail("Expected IllegalArgumentException");
- } catch (IllegalArgumentException e) {
- // expected result
- }
-
- // Invalid name (not lower case)
- try {
- new MimeType(types, "TEXT/PLAIN");
- fail("Expected IllegalArgumentException");
- } catch (IllegalArgumentException e) {
- // expected result
- }
}
public void testIsValidName() {
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Wed Apr 28 13:05:14 2010
@@ -35,7 +35,7 @@ public class MimeTypesTest extends TestC
types = new MimeTypes();
binary = types.forName("application/octet-stream");
text = types.forName("text/plain");
- text.addAlias("text/x-plain");
+ text.addAlias(MediaType.parse("text/x-plain"));
html = types.forName("text/html");
html.setSuperType(text);
}
@@ -52,16 +52,6 @@ public class MimeTypesTest extends TestC
}
}
- public void testAddAlias() throws MimeTypeException {
- assertEquals(text, types.forName("text/x-plain"));
- try {
- text.addAlias("invalid");
- fail("MimeTypeException not thrown on invalid alias name");
- } catch (MimeTypeException e) {
- // expected
- }
- }
-
public void testSuperType() throws MimeTypeException {
assertNull(binary.getSuperType());
assertEquals(binary, text.getSuperType());
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=938976&r1=938975&r2=938976&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed Apr 28 13:05:14 2010
@@ -250,16 +250,16 @@ public class TestMimeTypes extends TestC
* @since TIKA-194
*/
public void testJavaRegex() throws Exception{
- MimeType testType = new MimeType(this.repo, "foo/bar");
+ MimeType testType = new MimeType(this.repo, MediaType.parse("foo/bar"));
this.repo.add(testType);
assertNotNull(repo.forName("foo/bar"));
String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
this.repo.addPattern(testType, pattern, true);
String testFileName = "rtg_sst_grb_0.5.12345678";
assertNotNull(this.repo.getMimeType(testFileName));
- assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
+ assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
- MimeType testType2 = new MimeType(this.repo, "foo/bar2");
+ MimeType testType2 = new MimeType(this.repo, MediaType.parse("foo/bar2"));
this.repo.add(testType2);
assertNotNull(repo.forName("foo/bar2"));
this.repo.addPattern(testType2, pattern, false);
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Chris Mattmann, Ph.D.
Senior Computer Scientist
NASA Jet Propulsion Laboratory Pasadena, CA 91109 USA
Office: 171-266B, Mailstop: 171-246
Email: Chris.Mattmann@jpl.nasa.gov
WWW: http://sunset.usc.edu/~mattmann/
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adjunct Assistant Professor, Computer Science Department
University of Southern California, Los Angeles, CA 90089 USA
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++