You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2009/07/13 00:12:53 UTC

svn commit: r793417 - in /lucene/tika/trunk: tika-core/src/main/java/org/apache/tika/mime/MediaType.java tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java

Author: mattmann
Date: Sun Jul 12 22:12:52 2009
New Revision: 793417

URL: http://svn.apache.org/viewvc?rev=793417&view=rev
Log:
- fix for TIKA-121 MimeType.clean method no longer exists as a capability

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
    lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Sun Jul 12 22:12:52 2009
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.mime;
 
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
@@ -29,7 +30,7 @@
 public final class MediaType {
 
     private static final Map<String, String> NO_PARAMETERS =
-        Collections.emptyMap();
+        new TreeMap<String, String>();
 
     private static final Pattern SPECIAL =
         Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]");
@@ -47,22 +48,37 @@
         new MediaType("application", "xml", NO_PARAMETERS);
 
     /**
-     * Parses the given string to a media type. The string is expected
-     * to be of the form "type/subtype(; parameter=...)*" as defined
-     * in RFC 2045.
-     * <p>
-     * Note that currently this method only parses the "type/subtype" part
-     * of the string. Any parameters are simply discarded. TODO: Change this.
-     *
-     * @param string media type string to be parsed
+     * Parses the given string to a media type. The string is expected to be of
+     * the form "type/subtype(; parameter=...)*" as defined in RFC 2045.
+     * 
+     * @param string
+     *            media type string to be parsed
      * @return parsed media type, or <code>null</code> if parsing fails
      */
     public static MediaType parse(String string) {
         int colon = string.indexOf(';');
-        if (colon != -1) {
-            string = string.substring(0, colon);
-        }
+        if (colon != -1 && colon != string.length()-1) {
+            String primarySubString = string.substring(0, colon);
+            String parameters = string
+                    .substring(colon + 1, string.length());
+
+            MediaType type = parseNoParams(primarySubString);
+            String[] paramBases = parameters.split(";");
+            for (int i = 0; i < paramBases.length; i++) {
+                String[] paramToks = paramBases[i].split("=");
+                String paramName = paramToks[0].trim();
+                String paramValue = paramToks[1].trim();
+                type.parameters.put(paramName, paramValue);
+            }
+
+            return type;
+
+        } else
+            return parseNoParams(string);
+
+    }
 
+    private static MediaType parseNoParams(String string) {
         int slash = string.indexOf('/');
         if (slash != -1) {
             String type = string.substring(0, slash).trim();

Modified: lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java (original)
+++ lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java Sun Jul 12 22:12:52 2009
@@ -17,6 +17,8 @@
 package org.apache.tika.detect;
 
 import java.io.IOException;
+import java.util.Map;
+import java.util.TreeMap;
 
 import junit.framework.TestCase;
 
@@ -29,13 +31,22 @@
 public class TypeDetectorTest extends TestCase {
 
     private Detector detector = new TypeDetector();
+    
+    private static final Map<String, String> params = new
+        TreeMap<String, String>();
+    static{
+        params.put("a", "b");
+    }
+    
+    private static final MediaType TEXT_PLAIN_A_EQ_B = 
+          new MediaType("text", "plain", params);
 
     public void testDetect() {
         assertDetect(MediaType.TEXT_PLAIN, "text/plain");
         assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
         assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
-        assertDetect(MediaType.TEXT_PLAIN, "text/plain; a=b");
-        assertDetect(MediaType.TEXT_PLAIN, "\ttext/plain; a=b\n");
+        assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
+        assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
 
         assertDetect(MediaType.OCTET_STREAM, "text\\plain");
 

Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java?rev=793417&r1=793416&r2=793417&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MediaTypeTest.java Sun Jul 12 22:12:52 2009
@@ -106,6 +106,55 @@
                 + "; c=\"\\(\\)\\<\\>\\@\\,\\;\\:\\\\\\\"\\/\\[\\]\\?\\=\"",
                 new MediaType("text", "plain", parameters).toString());
     }
+    
+    /**
+     * @since TIKA-121
+     */
+    public void testParseWithParams() {
+        String mimeStringWithParams = "text/html;charset=UTF-8;foo=bar;foo2=bar2";
+
+        MediaType type = MediaType.parse(mimeStringWithParams);
+        assertNotNull(type);
+        assertNotNull(type.getParameters());
+        assertNotNull(type.getParameters().keySet());
+        assertEquals(3, type.getParameters().keySet().size());
+        boolean gotCharset = false, gotFoo = false, gotFoo2 = false;
+        for (String param : type.getParameters().keySet()) {
+            if (param.equals("charset")) {
+                gotCharset = true;
+            } else if (param.equals("foo")) {
+                gotFoo = true;
+            } else if (param.equals("foo2")) {
+                gotFoo2 = true;
+            }
+        }
+        assertTrue(gotCharset && gotFoo && gotFoo2);
+    }
+
+    /**
+     * @since TIKA-121
+     */
+    public void testParseNoParams() {
+        String mimeStringNoParams = "text/html";
+
+        MediaType type = MediaType.parse(mimeStringNoParams);
+        assertNotNull(type);
+        assertNotNull(type.getParameters());
+        assertNotNull(type.getParameters().keySet());
+        assertEquals(0, type.getParameters().keySet().size());
+    }
+
+    /**
+     * @since TIKA-121
+     */
+    public void testParseNoParamsWithSemi() {
+        String mimeStringNoParamsWithSemi = "text/html;";
+        MediaType type = MediaType.parse(mimeStringNoParamsWithSemi);
+        assertNotNull(type);
+        assertNotNull(type.getParameters());
+        assertNotNull(type.getParameters().keySet());
+        assertEquals(0, type.getParameters().keySet().size());
+    }
 
     
 }