You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2009/07/13 00:12:53 UTC
svn commit: r793417 - in /lucene/tika/trunk:
tika-core/src/main/java/org/apache/tika/mime/MediaType.java
tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java
Author: mattmann
Date: Sun Jul 12 22:12:52 2009
New Revision: 793417
URL: http://svn.apache.org/viewvc?rev=793417&view=rev
Log:
- fix for TIKA-121 MimeType.clean method no longer exists as a capability
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Sun Jul 12 22:12:52 2009
@@ -16,6 +16,7 @@
*/
package org.apache.tika.mime;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -29,7 +30,7 @@
public final class MediaType {
private static final Map<String, String> NO_PARAMETERS =
- Collections.emptyMap();
+ new TreeMap<String, String>();
private static final Pattern SPECIAL =
Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
@@ -47,22 +48,37 @@
new MediaType("application", "xml", NO_PARAMETERS);
/**
- * Parses the given string to a media type. The string is expected
- * to be of the form "type/subtype(; parameter=...)*" as defined
- * in RFC 2045.
- * <p>
- * Note that currently this method only parses the "type/subtype" part
- * of the string. Any parameters are simply discarded. TODO: Change this.
- *
- * @param string media type string to be parsed
+ * Parses the given string to a media type. The string is expected to be of
+ * the form "type/subtype(; parameter=...)*" as defined in RFC 2045.
+ *
+ * @param string
+ * media type string to be parsed
* @return parsed media type, or <code>null</code> if parsing fails
*/
public static MediaType parse(String string) {
int colon = string.indexOf(';');
- if (colon != -1) {
- string = string.substring(0, colon);
- }
+ if (colon != -1 && colon != string.length()-1) {
+ String primarySubString = string.substring(0, colon);
+ String parameters = string
+ .substring(colon + 1, string.length());
+
+ MediaType type = parseNoParams(primarySubString);
+ String[] paramBases = parameters.split(";");
+ for (int i = 0; i < paramBases.length; i++) {
+ String[] paramToks = paramBases[i].split("=");
+ String paramName = paramToks[0].trim();
+ String paramValue = paramToks[1].trim();
+ type.parameters.put(paramName, paramValue);
+ }
+
+ return type;
+
+ } else
+ return parseNoParams(string);
+
+ }
+ private static MediaType parseNoParams(String string) {
int slash = string.indexOf('/');
if (slash != -1) {
String type = string.substring(0, slash).trim();
Modified: lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java (original)
+++ lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java Sun Jul 12 22:12:52 2009
@@ -17,6 +17,8 @@
package org.apache.tika.detect;
import java.io.IOException;
+import java.util.Map;
+import java.util.TreeMap;
import junit.framework.TestCase;
@@ -29,13 +31,22 @@
public class TypeDetectorTest extends TestCase {
private Detector detector = new TypeDetector();
+
+ private static final Map<String, String> params = new
+ TreeMap<String, String>();
+ static{
+ params.put("a", "b");
+ }
+
+ private static final MediaType TEXT_PLAIN_A_EQ_B =
+ new MediaType("text", "plain", params);
public void testDetect() {
assertDetect(MediaType.TEXT_PLAIN, "text/plain");
assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
- assertDetect(MediaType.TEXT_PLAIN, "text/plain; a=b");
- assertDetect(MediaType.TEXT_PLAIN, "\ttext/plain; a=b\n");
+ assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
+ assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
assertDetect(MediaType.OCTET_STREAM, "text\\plain");
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java Sun Jul 12 22:12:52 2009
@@ -106,6 +106,55 @@
+ "; c=\"\\(\\)\\<\\>\\@\\,\\;\\:\\\\\\\"\\/\\[\\]\\?\\=\"",
new MediaType("text", "plain", parameters).toString());
}
+
+ /**
+ * @since TIKA-121
+ */
+ public void testParseWithParams() {
+ String mimeStringWithParams = "text/html;charset=UTF-8;foo=bar;foo2=bar2";
+
+ MediaType type = MediaType.parse(mimeStringWithParams);
+ assertNotNull(type);
+ assertNotNull(type.getParameters());
+ assertNotNull(type.getParameters().keySet());
+ assertEquals(3, type.getParameters().keySet().size());
+ boolean gotCharset = false, gotFoo = false, gotFoo2 = false;
+ for (String param : type.getParameters().keySet()) {
+ if (param.equals("charset")) {
+ gotCharset = true;
+ } else if (param.equals("foo")) {
+ gotFoo = true;
+ } else if (param.equals("foo2")) {
+ gotFoo2 = true;
+ }
+ }
+ assertTrue(gotCharset && gotFoo && gotFoo2);
+ }
+
+ /**
+ * @since TIKA-121
+ */
+ public void testParseNoParams() {
+ String mimeStringNoParams = "text/html";
+
+ MediaType type = MediaType.parse(mimeStringNoParams);
+ assertNotNull(type);
+ assertNotNull(type.getParameters());
+ assertNotNull(type.getParameters().keySet());
+ assertEquals(0, type.getParameters().keySet().size());
+ }
+
+ /**
+ * @since TIKA-121
+ */
+ public void testParseNoParamsWithSemi() {
+ String mimeStringNoParamsWithSemi = "text/html;";
+ MediaType type = MediaType.parse(mimeStringNoParamsWithSemi);
+ assertNotNull(type);
+ assertNotNull(type.getParameters());
+ assertNotNull(type.getParameters().keySet());
+ assertEquals(0, type.getParameters().keySet().size());
+ }
}