You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/02/04 17:36:42 UTC

svn commit: r1442168 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/mime/ tika-core/src/main/resources/org/apache/tika/mime/ tika-core/src/test/java/org/apache/tika/mime/ tika-parsers/src/test/java/org/apache/tika/mime/

Author: nick
Date: Mon Feb  4 16:36:42 2013
New Revision: 1442168

URL: http://svn.apache.org/viewvc?rev=1442168&view=rev
Log:
Support tika:link and tika:uti mimetype extensions, along with unit tests. Modified version of the patch from TIKA-1012

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Mon Feb  4 16:36:42 2013
@@ -17,7 +17,9 @@
 package org.apache.tika.mime;
 
 import java.io.Serializable;
+import java.net.URI;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -75,6 +77,21 @@ public final class MimeType implements C
     private final MediaType type;
 
     /**
+     * The MimeType acronym
+     */
+    private String acronym = "";
+
+    /**
+     * The http://en.wikipedia.org/wiki/Uniform_Type_Identifier
+     */
+    private String uti = "";
+    
+    /**
+     * Documentation Links
+     */
+    private List<URI> links = Collections.emptyList();
+    
+    /**
      * Description of this media type.
      */
     private String description = "";
@@ -148,6 +165,75 @@ public final class MimeType implements C
         }
         this.description = description;
     }
+    
+
+    /**
+     * Returns an acronym for this mime type.
+     *
+     * @return mime type acronym
+     */
+    public String getAcronym() {
+        return acronym;
+    }
+
+    /**
+     * Set an acronym for the mime type
+     *
+     * @param acronym
+     */
+    void setAcronym(String v) {
+        if (v == null) {
+            throw new IllegalArgumentException("Acronym is missing");
+        }
+        acronym = v;
+    }
+    
+    /**
+     * Get the UTI for this mime type.
+     * 
+     * @see http://en.wikipedia.org/wiki/Uniform_Type_Identifier
+     * 
+     * @return The Uniform Type Identifier
+     */
+    public String getUniformTypeIdentifier() {
+        return uti;
+    }
+
+    /**
+     * Set The Uniform Type Identifier
+     *
+     * @param uti
+     */
+    void setUniformTypeIdentifier(String v) {
+        if (v == null) {
+            throw new IllegalArgumentException("Uniform Type Identifier is missing");
+        }
+        uti = v;
+    }
+
+    /**
+     * Get a list of links to help document this mime type
+     * 
+     * @return an array of links (will never be null)
+     */
+    public List<URI> getLinks() {
+      return links; // this is already unmodifiable
+    }
+
+    /**
+     * Add a link to this mime type
+     * @param link
+     */
+    void addLink(URI link) {
+        if(link==null) {
+            throw new IllegalArgumentException("Missing Link");
+        }
+        List<URI> copy = new ArrayList<URI>(links.size()+1);
+        copy.addAll(links);
+        copy.add(link);
+        links = Collections.unmodifiableList(copy);
+    }
+
 
     /**
      * Add some rootXML info to this mime-type

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java Mon Feb  4 16:36:42 2013
@@ -19,9 +19,10 @@ package org.apache.tika.mime;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.LinkedList;
 import java.util.List;
 
 import javax.xml.parsers.ParserConfigurationException;
@@ -89,6 +90,11 @@ import org.xml.sax.helpers.DefaultHandle
  *         type CDATA #REQUIRED&gt;
  *  ]&gt;
  * </pre>
+ * 
+ * In addition to the standard fields, this will also read two Tika specific fields:
+ *  - link
+ *  - uti
+ * 
  *
  * @see http://freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec
  */
@@ -154,7 +160,10 @@ class MimeTypesReader extends DefaultHan
         } else if (SUB_CLASS_OF_TAG.equals(qName)) {
             String parent = attributes.getValue(SUB_CLASS_TYPE_ATTR);
             types.setSuperType(type, MediaType.parse(parent));
-        } else if (COMMENT_TAG.equals(qName)) {
+        } else if (ACRONYM_TAG.equals(qName)||
+                   COMMENT_TAG.equals(qName)||
+                   TIKA_LINK_TAG.equals(qName)||
+                   TIKA_UTI_TAG.equals(qName)) {
             characters = new StringBuilder();
         } else if (GLOB_TAG.equals(qName)) {
             String pattern = attributes.getValue(PATTERN_ATTR);
@@ -199,6 +208,20 @@ class MimeTypesReader extends DefaultHan
             } else if (COMMENT_TAG.equals(qName)) {
                 type.setDescription(characters.toString().trim());
                 characters = null;
+            } else if (ACRONYM_TAG.equals(qName)) {
+                type.setAcronym(characters.toString().trim());
+                characters = null;
+            } else if (TIKA_UTI_TAG.equals(qName)) {
+                type.setUniformTypeIdentifier(characters.toString().trim());
+                characters = null;
+            } else if (TIKA_LINK_TAG.equals(qName)) {
+                try {
+                    type.addLink(new URI(characters.toString().trim()));
+                } 
+                catch (URISyntaxException e) {
+                    throw new IllegalArgumentException("unable to parse link: "+characters, e);
+                }
+                characters = null;
             } else if (MATCH_TAG.equals(qName)) {
                 current.stop();
             } else if (MAGIC_TAG.equals(qName)) {

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java Mon Feb  4 16:36:42 2013
@@ -27,6 +27,8 @@ public interface MimeTypesReaderMetKeys 
 
     String MIME_TYPE_TYPE_ATTR = "type";
 
+    String ACRONYM_TAG = "acronym";
+
     String COMMENT_TAG = "_comment";
 
     String GLOB_TAG = "glob";
@@ -63,4 +65,7 @@ public interface MimeTypesReaderMetKeys 
 
     String LOCAL_NAME_ATTR = "localName";
 
+    String TIKA_LINK_TAG = "tika:link";
+    
+    String TIKA_UTI_TAG = "tika:uti";
 }

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Mon Feb  4 16:36:42 2013
@@ -3752,6 +3752,8 @@
     <alias type="image/bmp"/>
     <acronym>BMP</acronym>
     <_comment>Windows bitmap</_comment>
+    <tika:link>http://en.wikipedia.org/wiki/BMP_file_format</tika:link>
+    <tika:uti>com.microsoft.bmp</tika:uti>
     <magic priority="50">
       <match value="BM" type="string" offset="0">
         <match value="0x0100" type="string" offset="26">

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java Mon Feb  4 16:36:42 2013
@@ -119,6 +119,17 @@ public class MimeTypesReaderTest extends
     }
     
     /**
+     * @since TIKA-1012
+     */
+    public void testReadExtendedMetadata() throws Exception {
+        MimeType bmp = this.mimeTypes.forName("image/x-ms-bmp");
+        assertEquals("BMP", bmp.getAcronym());
+        assertEquals("com.microsoft.bmp", bmp.getUniformTypeIdentifier());
+        assertEquals("http://en.wikipedia.org/wiki/BMP_file_format", 
+            bmp.getLinks().get(0).toString());
+    }
+    
+    /**
      * TIKA-746 Ensures that the custom mimetype maps were also 
      *  loaded and used
      */

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java?rev=1442168&r1=1442167&r2=1442168&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypeTest.java Mon Feb  4 16:36:42 2013
@@ -63,13 +63,34 @@ public class MimeTypeTest extends TestCa
     }
 
     /** Test MimeType setDescription() */
-    public void testSetDescription() {
+    public void testSetEmptyValues() {
         try {
             text.setDescription(null);
             fail("Expected IllegalArgumentException");
         } catch (IllegalArgumentException e) {
             // expected result
         }
+        
+        try {
+            text.setAcronym(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+        
+        try {
+            text.addLink(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+
+        try {
+            text.setUniformTypeIdentifier(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
     }
 
 }