You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by lf...@apache.org on 2017/09/08 15:37:04 UTC

[tika] branch master updated: TIKA-2460: load custom mimetypes XML from sys prop

This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 70ca280  TIKA-2460: load custom mimetypes XML from sys prop
70ca280 is described below

commit 70ca280f11fe4127df290b8027c6bc1d5180271f
Author: lfcnassif <lf...@gmail.com>
AuthorDate: Fri Sep 8 12:36:48 2017 -0300

    TIKA-2460: load custom mimetypes XML from sys prop
---
 CHANGES.txt                                        |  3 +++
 .../org/apache/tika/mime/MimeTypesFactory.java     | 19 +++++++++++++++++++
 .../org/apache/tika/mime/MimeTypesReaderTest.java  | 16 ++++++++++++++++
 .../org/apache/tika/mime/external-mimetypes.xml    | 22 ++++++++++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/CHANGES.txt b/CHANGES.txt
index 5161e65..f7d0521 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 1.17 - ???
 
+  * Load external custom mimetypes XML from system property 
+    tika.custom-mimetypes (TIKA-2460). 
+
   * Extract number of tiffs in a multi-page tiff (TIKA-2451).
 
   * Fix detection of emails extracted from mbox (TIKA-2456).
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
index ac434fd..c6ffd25 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.mime;
 
+import java.io.File;
 import java.io.InputStream;
 import java.io.IOException;
 import java.net.URL;
@@ -29,6 +30,12 @@ import org.w3c.dom.Document;
  * Creates instances of MimeTypes.
  */
 public class MimeTypesFactory {
+    
+    /**
+     * System property to set a path to an additional external custom mimetypes 
+     * XML file to be loaded.
+     */
+    public static final String CUSTOM_MIMES_SYS_PROP = "tika.custom-mimetypes";
 
     /**
      * Creates an empty instance; same as calling new MimeTypes().
@@ -139,6 +146,8 @@ public class MimeTypesFactory {
      *  override mimetypes found will loaded afterwards.
      * The file paths will be interpreted by the specified class  
      *  loader in getResource().
+     *  It will also load custom mimetypes from the system property
+     *  {@link #CUSTOM_MIMES_SYS_PROP}, if specified.
      * 
      * @param coreFilePath The main MimeTypes file to load
      * @param extensionFilePath The name of extension MimeType files to load afterwards
@@ -167,6 +176,16 @@ public class MimeTypesFactory {
         urls.add(coreURL);
         urls.addAll(extensionURLs);
         
+        String customMimesPath = System.getProperty(CUSTOM_MIMES_SYS_PROP);
+        if(customMimesPath != null){
+            File externalFile = new File(customMimesPath);
+            if(!externalFile.exists())
+                throw new IOException(
+                        "Specified custom mimetypes file not found: " + customMimesPath);
+            URL externalURL = externalFile.toURI().toURL();
+            urls.add(externalURL);
+        }
+        
         return create( urls.toArray(new URL[urls.size()]) );
     }
 }
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
index bddaf1a..8782167 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
@@ -233,6 +233,22 @@ public class MimeTypesReaderTest {
        }
     }
     
+    private class CustomClassLoader extends ClassLoader{
+    }
+    
+    /**
+     * TIKA-2460 Test loading of custom-mimetypes.xml from sys prop.
+     */
+    @Test
+    public void testExternalMimeTypes() throws Exception {
+        System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, 
+                "src/test/resources/org/apache/tika/mime/external-mimetypes.xml");
+        MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
+        Metadata m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "test.external.mime.type");
+        assertEquals("external/mime-type", mimeTypes.detect(null, m).toString());
+    }
+    
     @Test
     public void testGetExtensionForPowerPoint() throws Exception {
         MimeType mt = this.mimeTypes.forName("application/vnd.ms-powerpoint");
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/external-mimetypes.xml b/tika-core/src/test/resources/org/apache/tika/mime/external-mimetypes.xml
new file mode 100644
index 0000000..04d01d7
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/mime/external-mimetypes.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<mime-info>
+  <mime-type type="external/mime-type">
+     <glob pattern="*.external.mime.type" />
+  </mime-type>
+</mime-info>

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].