You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/03/22 02:19:24 UTC

[12/13] tika git commit: TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.

TIKA-1855 -- first pass.  Need to turn back on the forbidden-apis testCheck.  More clean up remains.


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/aa5f60d7
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/aa5f60d7
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/aa5f60d7

Branch: refs/heads/2.x
Commit: aa5f60d7a0ac0a6a9d739344c76b10940132503f
Parents: 41915dc
Author: tballison <ta...@mitre.org>
Authored: Mon Mar 21 21:18:00 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Mar 21 21:18:05 2016 -0400

----------------------------------------------------------------------
 pom.xml                                         |    7 +-
 tika-app/pom.xml                                |   15 +
 .../batch/builders/AppParserFactoryBuilder.java |    2 +-
 .../main/java/org/apache/tika/cli/TikaCLI.java  |    2 +-
 .../main/java/org/apache/tika/gui/TikaGUI.java  |    2 +-
 .../tika/config/TikaDetectorConfigTest.java     |  143 +++
 .../tika/config/TikaParserConfigTest.java       |  155 +++
 .../tika/config/TikaTranslatorConfigTest.java   |   73 ++
 .../tika/detect/TestContainerAwareDetector.java |  410 +++++++
 .../tika/embedder/ExternalEmbedderTest.java     |  285 +++++
 .../java/org/apache/tika/mime/MimeTypeTest.java |  108 ++
 .../org/apache/tika/mime/MimeTypesTest.java     |  122 ++
 .../org/apache/tika/mime/TestMimeTypes.java     | 1044 +++++++++++++++++
 .../tika/parser/AutoDetectParserTest.java       |  459 ++++++++
 .../apache/tika/parser/DigestingParserTest.java |  139 +++
 .../apache/tika/parser/ParsingReaderTest.java   |  104 ++
 .../tika/parser/RecursiveParserWrapperTest.java |  312 ++++++
 .../org/apache/tika/parser/TestParsers.java     |  133 +++
 .../parser/fork/ForkParserIntegrationTest.java  |  268 +++++
 .../apache/tika/parser/mock/MockParserTest.java |  251 +++++
 .../org/apache/tika/parser/pkg/PackageTest.java |  335 ++++++
 .../sax/PhoneExtractingContentHandlerTest.java  |   58 +
 .../tika/utils/ServiceLoaderUtilsTest.java      |   57 +
 tika-core/pom.xml                               |   19 +
 .../tika/parser/digesting/CommonsDigester.java  |  295 +++++
 .../src/test/java/org/apache/tika/TikaTest.java |   74 +-
 .../tika/detect/MimeDetectionWithNNTest.java    |    8 +-
 .../org/apache/tika/mime/MimeDetectionTest.java |    7 +-
 .../mime/ProbabilisticMimeDetectionTest.java    |    7 +-
 .../ProbabilisticMimeDetectionTestWithTika.java |    7 +-
 .../java/org/apache/tika/osgi/BundleIT.java     |   11 -
 .../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb   |  Bin 1362900 -> 0 bytes
 .../org/apache/tika/mime/brwNIMS_2014.dif       |   56 -
 .../apache/tika/mime/circles-with-prefix.svg    |    8 -
 .../resources/org/apache/tika/mime/circles.svg  |    8 -
 .../org/apache/tika/mime/datamatrix.png         |  Bin 204 -> 0 bytes
 .../tika/mime/gdas1.forecmwf.2014062612.grib2   |  Bin 2489194 -> 0 bytes
 .../resources/org/apache/tika/mime/htmlfragment |   18 -
 .../apache/tika/mime/plotutils-bin-cgm-v3.cgm   |  Bin 1744 -> 0 bytes
 .../org/apache/tika/mime/stylesheet.xsl         |    9 -
 .../apache/tika/mime/test-difficult-rdf1.xml    |   39 -
 .../apache/tika/mime/test-difficult-rdf2.xml    |   44 -
 .../org/apache/tika/mime/test-iso-8859-1.xml    |    2 -
 .../org/apache/tika/mime/test-long-comment.xml  |   21 -
 .../tika/mime/test-malformed-header.html.bin    |  Bin 305 -> 0 bytes
 .../org/apache/tika/mime/test-tika-327.html     |   50 -
 .../org/apache/tika/mime/test-utf16be.xml       |  Bin 126 -> 0 bytes
 .../org/apache/tika/mime/test-utf16le.xml       |  Bin 126 -> 0 bytes
 .../org/apache/tika/mime/test-utf8-bom.xml      |    2 -
 .../org/apache/tika/mime/test-utf8.xml          |    2 -
 .../resources/org/apache/tika/mime/test.html    |   10 -
 .../resources/org/apache/tika/mime/test.xls     |  Bin 13824 -> 0 bytes
 .../org/apache/tika/mime/testlargerbuffer.html  |  827 --------------
 tika-parent/pom.xml                             |    3 +-
 tika-parser-modules/pom.xml                     |   26 -
 .../tika/parser/ner/NamedEntityParserTest.java  |   16 +-
 .../parser/ner/regex/RegexNERecogniserTest.java |   15 +-
 .../apache/tika/parser/ner/regex/ner-regex.txt  |   17 +
 .../tika/parser/ner/tika-config-for-ner.xml     |   27 +
 .../tika/parser/jdbc/SQLite3ParserTest.java     |   50 +-
 .../tika/parser/chm/TestChmExtraction.java      |   25 +-
 .../tika/parser/microsoft/ExcelParserTest.java  |  387 +++----
 .../apache/tika/parser/odf/ODFParserTest.java   |  460 ++++----
 .../apache/tika/parser/rtf/RTFParserTest.java   |  163 +--
 .../apache/tika/parser/pdf/PDFParserTest.java   |  133 +--
 .../tika/parser/isatab/ISArchiveParser.java     |    3 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java |   17 +-
 .../apache/tika/parser/dif/DIFParserTest.java   |   31 +-
 .../tika/parser/envi/EnviHeaderParserTest.java  |   36 +-
 .../apache/tika/parser/gdal/TestGDALParser.java |   34 +-
 .../tika/parser/geo/topic/GeoParserTest.java    |   23 +-
 .../GeographicInformationParserTest.java        |   50 +-
 .../apache/tika/parser/grib/GribParserTest.java |   30 +-
 .../apache/tika/parser/hdf/HDFParserTest.java   |   44 +-
 .../tika/parser/isatab/ISArchiveParserTest.java |   80 +-
 .../apache/tika/parser/mat/MatParserTest.java   |   60 +-
 .../tika/parser/netcdf/NetCDFParserTest.java    |   48 +-
 .../tika/parser/strings/StringsParserTest.java  |   23 +-
 .../tika/parser/txt/CharsetDetectorTest.java    |    7 +-
 .../apache/tika/parser/txt/TXTParserTest.java   |   51 +-
 .../apache/tika/parser/xml/DcXMLParserTest.java |   28 +-
 .../EmptyAndDuplicateElementsXMLParserTest.java |   60 +-
 .../tika/parser/xml/FictionBookParserTest.java  |   19 +-
 tika-parsers/pom.xml                            |  333 ------
 .../main/appended-resources/META-INF/LICENSE    |   94 --
 .../apache/tika/parser/internal/Activator.java  |   54 -
 .../tika/parser/utils/CommonsDigester.java      |  299 -----
 .../test/java/org/apache/tika/TestParsers.java  |  109 --
 .../tika/config/TikaDetectorConfigTest.java     |  143 ---
 .../tika/config/TikaParserConfigTest.java       |  157 ---
 .../tika/config/TikaTranslatorConfigTest.java   |   72 --
 .../tika/detect/TestContainerAwareDetector.java |  410 -------
 .../tika/embedder/ExternalEmbedderTest.java     |  292 -----
 .../java/org/apache/tika/mime/MimeTypeTest.java |  105 --
 .../org/apache/tika/mime/MimeTypesTest.java     |  122 --
 .../org/apache/tika/mime/TestMimeTypes.java     | 1047 ------------------
 .../tika/parser/AutoDetectParserTest.java       |  459 --------
 .../apache/tika/parser/DigestingParserTest.java |  136 ---
 .../apache/tika/parser/ParsingReaderTest.java   |  104 --
 .../tika/parser/RecursiveParserWrapperTest.java |  312 ------
 .../parser/fork/ForkParserIntegrationTest.java  |  268 -----
 .../apache/tika/parser/mock/MockParserTest.java |  251 -----
 .../org/apache/tika/parser/pkg/PackageTest.java |  335 ------
 .../sax/PhoneExtractingContentHandlerTest.java  |   58 -
 .../tika/utils/ServiceLoaderUtilsTest.java      |   57 -
 tika-server/pom.xml                             |    8 +-
 .../org/apache/tika/server/TikaServerCli.java   |    2 +-
 .../org/apache/tika/server/CXFTestBase.java     |   14 +-
 .../tika/server/DetectorResourceTest.java       |    6 +-
 .../tika/server/LanguageResourceTest.java       |    4 +-
 .../tika/server/MetadataResourceTest.java       |   26 +-
 .../server/RecursiveMetadataResourceTest.java   |   36 +-
 .../apache/tika/server/StackTraceOffTest.java   |    8 +-
 .../org/apache/tika/server/StackTraceTest.java  |    8 +-
 .../org/apache/tika/server/TikaParsersTest.java |   12 +-
 .../apache/tika/server/TikaResourceTest.java    |   23 +-
 .../tika/server/UnpackerResourceTest.java       |   20 +-
 tika-server/src/test/resources/2exe.docx        |  Bin 715333 -> 0 bytes
 tika-server/src/test/resources/2pic.doc         |  Bin 4339712 -> 0 bytes
 tika-server/src/test/resources/2pic.docx        |  Bin 883427 -> 0 bytes
 .../src/test/resources/CDEC_WEATHER_2010_03_02  |   98 --
 tika-server/src/test/resources/Doc1_ole.doc     |  Bin 89600 -> 0 bytes
 tika-server/src/test/resources/english.txt      |    1 -
 tika-server/src/test/resources/foo.csv          |    4 -
 tika-server/src/test/resources/french.txt       |    1 -
 .../test/resources/mime/custom-mimetypes.xml    |   24 -
 .../src/test/resources/mock/null_pointer.xml    |   25 -
 .../org/apache/tika/mime/custom-mimetypes.xml   |   24 +
 tika-server/src/test/resources/password.xls     |  Bin 22528 -> 0 bytes
 tika-server/src/test/resources/pic.xls          |  Bin 593920 -> 0 bytes
 tika-server/src/test/resources/pic.xlsx         |  Bin 580188 -> 0 bytes
 tika-server/src/test/resources/test.doc         |  Bin 9216 -> 0 bytes
 .../testRTF_npeFromWMFInTikaServer.rtf          |  235 ----
 .../test/resources/test_recursive_embedded.docx |  Bin 27082 -> 0 bytes
 tika-test-resources/pom.xml                     |    7 -
 .../apache/tika/parser/ner/regex/ner-regex.txt  |   17 -
 .../org/apache/tika/parser/ner/tika-config.xml  |   27 -
 .../src/test/resources/test-documents/2exe.docx |  Bin 0 -> 715333 bytes
 .../src/test/resources/test-documents/2pic.doc  |  Bin 0 -> 4339712 bytes
 .../src/test/resources/test-documents/2pic.docx |  Bin 0 -> 883427 bytes
 .../test-documents/CDEC_WEATHER_2010_03_02      |   98 ++
 .../resources/test-documents/brwNIMS_2014.dif   |   56 +
 .../test-documents/circles-with-prefix.svg      |    8 +
 .../test/resources/test-documents/circles.svg   |    8 +
 .../resources/test-documents/datamatrix.png     |  Bin 0 -> 204 bytes
 .../test/resources/test-documents/english.txt   |    1 +
 .../src/test/resources/test-documents/foo.csv   |    4 +
 .../test/resources/test-documents/french.txt    |    1 +
 .../test/resources/test-documents/htmlfragment  |   18 +
 .../test-documents/mock/null_pointer.xml        |    4 +-
 .../test/resources/test-documents/password.xls  |  Bin 0 -> 22528 bytes
 .../src/test/resources/test-documents/pic.xls   |  Bin 0 -> 593920 bytes
 .../src/test/resources/test-documents/pic.xlsx  |  Bin 0 -> 580188 bytes
 .../test-documents/plotutils-bin-cgm-v3.cgm     |  Bin 0 -> 1744 bytes
 .../resources/test-documents/stylesheet.xsl     |    9 +
 .../test-documents/test-difficult-rdf1.xml      |   39 +
 .../test-documents/test-difficult-rdf2.xml      |   44 +
 .../test-documents/test-iso-8859-1.xml          |    2 +
 .../test-documents/test-long-comment.xml        |   21 +
 .../resources/test-documents/test-tika-327.html |   50 +
 .../resources/test-documents/test-utf16be.xml   |  Bin 0 -> 126 bytes
 .../resources/test-documents/test-utf16le.xml   |  Bin 0 -> 126 bytes
 .../resources/test-documents/test-utf8-bom.xml  |    2 +
 .../test/resources/test-documents/test-utf8.xml |    2 +
 .../src/test/resources/test-documents/test.html |   10 +
 .../src/test/resources/test-documents/test.xls  |  Bin 0 -> 13824 bytes
 .../testRTF_npeFromWMFInTikaServer.rtf          |  235 ++++
 .../test-documents/testlargerbuffer.html        |  827 ++++++++++++++
 168 files changed, 7231 insertions(+), 8029 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index c790244..ea4f114 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,9 +46,10 @@
 
   <modules>
     <module>tika-parent</module>
-    <module>tika-core</module>
     <module>tika-test-resources</module>
-    <module>tika-parsers</module>
+    <module>tika-core</module>
+    <module>tika-parser-modules</module>
+    <module>tika-parser-bundles</module>
     <module>tika-xmp</module>
     <module>tika-serialization</module>
     <module>tika-batch</module>
@@ -59,8 +60,6 @@
     <module>tika-langdetect</module>
     <module>tika-example</module>
     <module>tika-java7</module>
-    <module>tika-parser-modules</module>
-    <module>tika-parser-bundles</module>
   </modules>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/pom.xml
----------------------------------------------------------------------
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index e362391..9177afb 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -101,6 +101,21 @@
       <groupId>commons-io</groupId>
       <version>${commons.io.version}</version>
     </dependency>
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-test-resources</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
index 998f649..98f4343 100644
--- a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
@@ -23,7 +23,7 @@ import java.util.Map;
 import org.apache.tika.batch.DigestingAutoDetectParserFactory;
 import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.util.ClassLoaderUtil;
 import org.apache.tika.util.XMLDOMUtil;
 import org.w3c.dom.Node;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 314599e..a2b91c9 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -101,7 +101,7 @@ import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerFactory;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index 5ecc763..1bc9405 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -76,7 +76,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerDecorator;

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
new file mode 100644
index 0000000..132475a
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.EmptyDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.mbox.OutlookPSTParser;
+import org.apache.tika.parser.microsoft.POIFSContainerDetector;
+import org.apache.tika.parser.pkg.ZipContainerDetector;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of detectors
+ */
+public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testDetectorExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeDetector detector = (CompositeDetector)config.getDetector();
+        
+        // Should be wrapping two detectors
+        assertEquals(2, detector.getDetectors().size());
+
+        
+        // First should be DefaultDetector, second Empty, that order
+        assertEquals(DefaultDetector.class, detector.getDetectors().get(0).getClass());
+        assertEquals(EmptyDetector.class,   detector.getDetectors().get(1).getClass());
+        
+        
+        // Get the DefaultDetector from the config
+        DefaultDetector confDetector = (DefaultDetector)detector.getDetectors().get(0);
+        
+        // Get a fresh "default" DefaultParser
+        DefaultDetector normDetector = new DefaultDetector(config.getMimeRepository());
+        
+        
+        // The default one will offer the Zip and POIFS detectors
+        assertDetectors(normDetector, true, true);
+        
+        
+        // The one from the config won't, as we excluded those
+        assertDetectors(confDetector, false, false);
+    }
+    
+    /**
+     * TIKA-1708 - If the Zip detector is disabled, either explicitly,
+     *  or via giving a list of detectors that it isn't part of, ensure
+     *  that detection of PST files still works
+     */
+    @Test
+    public void testPSTDetectionWithoutZipDetector() throws Exception {
+        // Check the one with an exclude
+        TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
+        assertNotNull(configWX.getParser());
+        assertNotNull(configWX.getDetector());
+        CompositeDetector detectorWX = (CompositeDetector)configWX.getDetector();
+
+        // Check it has the POIFS one, but not the zip one
+        assertDetectors(detectorWX, true, false);
+        
+        
+        // Check the one with an explicit list
+        TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
+        assertNotNull(configCL.getParser());
+        assertNotNull(configCL.getDetector());
+        CompositeDetector detectorCL = (CompositeDetector)configCL.getDetector();
+        assertEquals(2, detectorCL.getDetectors().size());
+        
+        // Check it also has the POIFS one, but not the zip one
+        assertDetectors(detectorCL, true, false);
+        
+        
+        // Check that both detectors have a mimetypes with entries
+        assertTrue("Not enough mime types: " + configWX.getMediaTypeRegistry().getTypes().size(),
+                   configWX.getMediaTypeRegistry().getTypes().size() > 100);
+        assertTrue("Not enough mime types: " + configCL.getMediaTypeRegistry().getTypes().size(),
+                   configCL.getMediaTypeRegistry().getTypes().size() > 100);
+        
+        
+        // Now check they detect PST files correctly
+        TikaInputStream stream = TikaInputStream.cast(
+                getTestDocumentAsStream("testPST.pst"));
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
+                detectorWX.detect(stream, new Metadata())
+        );
+        assertEquals(
+                OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, 
+                detectorCL.detect(stream, new Metadata())
+        );
+    }
+    
+    private void assertDetectors(CompositeDetector detector, boolean shouldHavePOIFS,
+                                 boolean shouldHaveZip) {
+        boolean hasZip = false;
+        boolean hasPOIFS = false;
+        for (Detector d : detector.getDetectors()) {
+            if (d instanceof ZipContainerDetector) {
+                if (shouldHaveZip) {
+                    hasZip = true;
+                } else {
+                    fail("Shouldn't have the ZipContainerDetector from config");
+                }
+            }
+            if (d instanceof POIFSContainerDetector) {
+                if (shouldHavePOIFS) {
+                    hasPOIFS = true;
+                } else {
+                    fail("Shouldn't have the POIFSContainerDetector from config");
+                }
+            }
+        }
+        if (shouldHavePOIFS) assertTrue("Should have the POIFSContainerDetector", hasPOIFS);
+        if (shouldHaveZip)   assertTrue("Should have the ZipContainerDetector", hasZip);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
new file mode 100644
index 0000000..817beb4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.executable.ExecutableParser;
+import org.apache.tika.parser.xml.XMLParser;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of parsers
+ */
+public class TikaParserConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testMimeExcludeInclude() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        Parser parser = config.getParser();
+        
+        MediaType PDF = MediaType.application("pdf");
+        MediaType JPEG = MediaType.image("jpeg");
+        
+        
+        // Has two parsers
+        assertEquals(CompositeParser.class, parser.getClass());
+        CompositeParser cParser = (CompositeParser)parser;
+        assertEquals(2, cParser.getAllComponentParsers().size());
+        
+        // Both are decorated
+        assertTrue(cParser.getAllComponentParsers().get(0) instanceof ParserDecorator);
+        assertTrue(cParser.getAllComponentParsers().get(1) instanceof ParserDecorator);
+        ParserDecorator p0 = (ParserDecorator)cParser.getAllComponentParsers().get(0);
+        ParserDecorator p1 = (ParserDecorator)cParser.getAllComponentParsers().get(1);
+        
+        
+        // DefaultParser will be wrapped with excludes
+        assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
+        
+        assertNotContained(PDF, p0.getSupportedTypes(context));
+        assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
+        assertNotContained(JPEG, p0.getSupportedTypes(context));
+        assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
+        
+        
+        // Will have an empty parser for PDF
+        assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
+        assertEquals(1, p1.getSupportedTypes(context).size());
+        assertContains(PDF, p1.getSupportedTypes(context));
+        assertNotContained(PDF, p1.getWrappedParser().getSupportedTypes(context));
+    }
+    
+    @Test
+    public void testParserExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeParser parser = (CompositeParser)config.getParser();
+        
+        MediaType PE_EXE = MediaType.application("x-msdownload");
+        MediaType ELF = MediaType.application("x-elf");
+        
+        
+        // Get the DefaultParser from the config
+        ParserDecorator confWrappedParser = (ParserDecorator)parser.getParsers().get(MediaType.APPLICATION_XML);
+        assertNotNull(confWrappedParser);
+        DefaultParser confParser = (DefaultParser)confWrappedParser.getWrappedParser();
+        
+        // Get a fresh "default" DefaultParser
+        DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
+        
+        
+        // The default one will offer the Executable Parser
+        assertContains(PE_EXE, normParser.getSupportedTypes(context));
+        assertContains(ELF, normParser.getSupportedTypes(context));
+        
+        boolean hasExec = false;
+        for (Parser p : normParser.getParsers().values()) {
+            if (p instanceof ExecutableParser) {
+                hasExec = true;
+                break;
+            }
+        }
+        assertTrue(hasExec);
+        
+        
+        // The one from the config won't
+        assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
+        assertNotContained(ELF, confParser.getSupportedTypes(context));
+        
+        for (Parser p : confParser.getParsers().values()) {
+            if (p instanceof ExecutableParser)
+                fail("Shouldn't have the Executable Parser from config");
+        }
+    }
+    /**
+     * TIKA-1558 It should be possible to exclude Parsers from being picked up by
+     * DefaultParser.
+     */
+    @Test
+    public void defaultParserBlacklist() throws Exception {
+        TikaConfig config = new TikaConfig();
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeParser cp = (CompositeParser) config.getParser();
+        List<Parser> parsers = cp.getAllComponentParsers();
+
+        boolean hasXML = false;
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser) {
+                hasXML = true;
+                break;
+            }
+        }
+        assertTrue("Default config should include an XMLParser.", hasXML);
+
+        // This custom TikaConfig should exclude XMLParser and all of its subclasses.
+        config = getConfig("TIKA-1558-blacklistsub.xml");
+        cp = (CompositeParser) config.getParser();
+        parsers = cp.getAllComponentParsers();
+
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser)
+                fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
new file mode 100644
index 0000000..764bbe4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of translators
+ */
+public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testDefaultBehaviour() throws Exception {
+        TikaConfig config = TikaConfig.getDefaultConfig();
+        assertNotNull(config.getTranslator());
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsEmpty() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+    
+    /**
+     * Currently, Translators don't support Composites, so
+     *  if multiple translators are given, only the first wins
+     */
+    @Test
+    public void testRequestsMultiple() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
new file mode 100644
index 0000000..5787408
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link ContainerAwareDetector}
+ */
+public class TestContainerAwareDetector {
+    private final TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+    private final MimeTypes mimeTypes = tikaConfig.getMimeRepository();
+    private final Detector detector = new DefaultDetector(mimeTypes);
+
+    private void assertTypeByData(String file, String type) throws Exception {
+       assertTypeByNameAndData(file, null, type);
+    }
+    private void assertTypeByNameAndData(String file, String type) throws Exception {
+       assertTypeByNameAndData(file, file, type);
+    }
+    private void assertType(String file, String byData, String byNameAndData) throws Exception {
+       assertTypeByData(file, byData);
+       assertTypeByNameAndData(file, byNameAndData);
+    }
+    private void assertTypeByNameAndData(String dataFile, String name, String type) throws Exception {
+        assertTypeByNameAndData(dataFile, name, type, null);
+    }
+    private void assertTypeByNameAndData(String dataFile, String name, String typeFromDetector, String typeFromMagic) throws Exception {
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/" + dataFile))) {
+            Metadata m = new Metadata();
+            if (name != null)
+                m.add(Metadata.RESOURCE_NAME_KEY, name);
+
+            // Mime Magic version is likely to be less precise
+            if (typeFromMagic != null) {
+                assertEquals(
+                        MediaType.parse(typeFromMagic),
+                        mimeTypes.detect(stream, m));
+            }
+
+            // All being well, the detector should get it perfect
+            assertEquals(
+                    MediaType.parse(typeFromDetector),
+                    detector.detect(stream, m));
+        }
+    }
+
+    @Test
+    public void testDetectOLE2() throws Exception {
+        // Microsoft office types known by POI
+        assertTypeByData("testEXCEL.xls", "application/vnd.ms-excel");
+        assertTypeByData("testWORD.doc", "application/msword");
+        assertTypeByData("testPPT.ppt", "application/vnd.ms-powerpoint");
+        
+        assertTypeByData("test-outlook.msg", "application/vnd.ms-outlook");
+        assertTypeByData("test-outlook2003.msg", "application/vnd.ms-outlook");
+        assertTypeByData("testVISIO.vsd", "application/vnd.visio");
+        assertTypeByData("testPUBLISHER.pub", "application/x-mspublisher");
+        assertTypeByData("testWORKS.wps", "application/vnd.ms-works");
+        assertTypeByData("testWORKS2000.wps", "application/vnd.ms-works");
+        
+        // older Works Word Processor files can't be recognized
+        // they were created with Works Word Processor 7.0 (hence the text inside)
+        // and exported to the older formats with the "Save As" feature
+        assertTypeByData("testWORKSWordProcessor3.0.wps","application/vnd.ms-works");
+        assertTypeByData("testWORKSWordProcessor4.0.wps","application/vnd.ms-works");
+        assertTypeByData("testWORKSSpreadsheet7.0.xlr", "application/x-tika-msworks-spreadsheet");
+        assertTypeByData("testPROJECT2003.mpp", "application/vnd.ms-project");
+        assertTypeByData("testPROJECT2007.mpp", "application/vnd.ms-project");
+        
+        // Excel95 can be detected by not parsed
+        assertTypeByData("testEXCEL_95.xls", "application/vnd.ms-excel");
+
+        // Try some ones that POI doesn't handle, that are still OLE2 based
+        assertTypeByData("testCOREL.shw", "application/x-corelpresentations");
+        assertTypeByData("testQUATTRO.qpw", "application/x-quattro-pro");
+        assertTypeByData("testQUATTRO.wb3", "application/x-quattro-pro");
+        
+        assertTypeByData("testHWP_5.0.hwp", "application/x-hwp-v5");
+        
+        
+        // With the filename and data
+        assertTypeByNameAndData("testEXCEL.xls", "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD.doc", "application/msword");
+        assertTypeByNameAndData("testPPT.ppt", "application/vnd.ms-powerpoint");
+        
+        // With the wrong filename supplied, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xls", "notWord.doc",  "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD.doc",  "notExcel.xls", "application/msword");
+        assertTypeByNameAndData("testPPT.ppt",   "notWord.doc",  "application/vnd.ms-powerpoint");
+        
+        // With a filename of a totally different type, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xls", "notPDF.pdf",  "application/vnd.ms-excel");
+        assertTypeByNameAndData("testEXCEL.xls", "notPNG.png",  "application/vnd.ms-excel");
+    }
+    
+    /**
+     * There is no way to distinguish "proper" StarOffice files from templates.
+     * All templates have the same extension but their actual type depends on
+     * the magic. Our current MimeTypes class doesn't allow us to use the same
+     * glob pattern in more than one mimetype.
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testDetectStarOfficeFiles() throws Exception {
+        assertType("testStarOffice-5.2-calc.sdc",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertType("testVORCalcTemplate.vor",
+                "application/vnd.stardivision.calc",
+                "application/vnd.stardivision.calc");
+        assertType("testStarOffice-5.2-draw.sda",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertType("testVORDrawTemplate.vor",
+                "application/vnd.stardivision.draw",
+                "application/vnd.stardivision.draw");
+        assertType("testStarOffice-5.2-impress.sdd",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertType("testVORImpressTemplate.vor",
+                "application/vnd.stardivision.impress",
+                "application/vnd.stardivision.impress");
+        assertType("testStarOffice-5.2-writer.sdw",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+        assertType("testVORWriterTemplate.vor",
+                "application/vnd.stardivision.writer",
+                "application/vnd.stardivision.writer");
+
+    }
+
+    @Test
+    public void testOpenContainer() throws Exception {
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/testPPT.ppt"))) {
+            assertNull(stream.getOpenContainer());
+            assertEquals(
+                    MediaType.parse("application/vnd.ms-powerpoint"),
+                    detector.detect(stream, new Metadata()));
+            assertTrue(stream.getOpenContainer() instanceof NPOIFSFileSystem);
+        }
+    }
+
+    /**
+     * EPub uses a similar mimetype entry to OpenDocument for storing
+     *  the mimetype within the parent zip file
+     */
+    @Test
+    public void testDetectEPub() throws Exception {
+       assertTypeByData("testEPUB.epub", "application/epub+zip");
+       assertTypeByData("testiBooks.ibooks", "application/x-ibooks+zip");
+    }
+    
+    @Test
+    public void testDetectLotusNotesEml() throws Exception {
+        // Lotus .eml files aren't guaranteed to have any of the magic 
+        // matches as the first line, but should have X-Notes-Item and Message-ID
+        assertTypeByData("testLotusEml.eml", "message/rfc822");
+     }
+
+    @Test
+    public void testDetectODF() throws Exception {
+        assertTypeByData("testODFwithOOo3.odt", "application/vnd.oasis.opendocument.text");
+        assertTypeByData("testOpenOffice2.odf", "application/vnd.oasis.opendocument.formula");
+    }
+
+    @Test
+    public void testDetectOOXML() throws Exception {
+        assertTypeByData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+
+        // Check some of the less common OOXML types
+        assertTypeByData("testPPT.pptm", "application/vnd.ms-powerpoint.presentation.macroenabled.12");
+        assertTypeByData("testPPT.ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow");
+        assertTypeByData("testPPT.ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12");
+        assertTypeByData("testDOTM.dotm", "application/vnd.ms-word.template.macroEnabled.12");
+        assertTypeByData("testEXCEL.strict.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByData("testPPT.xps", "application/vnd.ms-xpsdocument");
+
+        assertTypeByData("testVISIO.vsdm", "application/vnd.ms-visio.drawing.macroenabled.12");
+        assertTypeByData("testVISIO.vsdx", "application/vnd.ms-visio.drawing");
+        assertTypeByData("testVISIO.vssm", "application/vnd.ms-visio.stencil.macroenabled.12");
+        assertTypeByData("testVISIO.vssx", "application/vnd.ms-visio.stencil");
+        assertTypeByData("testVISIO.vstm", "application/vnd.ms-visio.template.macroenabled.12");
+        assertTypeByData("testVISIO.vstx", "application/vnd.ms-visio.template");
+        
+        // .xlsb is an OOXML file containing the binary parts, and not
+        //  an OLE2 file as you might initially expect!
+        assertTypeByData("testEXCEL.xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12");
+
+        // With the filename and data
+        assertTypeByNameAndData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByNameAndData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByNameAndData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+        
+        // With the wrong filename supplied, data will trump filename
+        assertTypeByNameAndData("testEXCEL.xlsx", "notWord.docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        assertTypeByNameAndData("testWORD.docx",  "notExcel.xlsx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        assertTypeByNameAndData("testPPT.pptx",   "notWord.docx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+        
+        // With an incorrect filename of a different container type, data trumps filename
+        assertTypeByNameAndData("testEXCEL.xlsx", "notOldExcel.xls", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    }
+    
+    /**
+     * Password Protected OLE2 files are fairly straightforward to detect, as they
+     *  have the same structure as regular OLE2 files. (Core streams may be encrypted
+     *  however)
+     */
+    @Test
+    public void testDetectProtectedOLE2() throws Exception {
+        assertTypeByData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+        assertTypeByData("testWORD_protected_passtika.doc", "application/msword");
+        assertTypeByData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+        assertTypeByNameAndData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+        assertTypeByNameAndData("testWORD_protected_passtika.doc", "application/msword");
+        assertTypeByNameAndData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+    }
+
+    /**
+     * Password Protected OOXML files are much more tricky beasts to work with.
+     * They have a very different structure to regular OOXML files, and instead
+     *  of being ZIP based they are actually an OLE2 file which contains the
+     *  OOXML structure within an encrypted stream.
+     * This makes detecting them much harder...
+     */
+    @Test
+    public void testDetectProtectedOOXML() throws Exception {
+        // Encrypted Microsoft Office OOXML files have OLE magic but
+        //  special streams, so we can tell they're Protected OOXML
+        assertTypeByData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml-protected");
+        
+        // At the moment, we can't use the name to specialise
+        // See discussions on TIKA-790 for details
+        assertTypeByNameAndData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByNameAndData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml-protected");
+        assertTypeByNameAndData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml-protected");
+    }
+
+    /**
+     * Check that temporary files created by Tika are removed after
+     * closing TikaInputStream.
+     */
+    @Test
+    public void testRemovalTempfiles() throws Exception {
+        assertRemovalTempfiles("testWORD.docx");
+        assertRemovalTempfiles("test-documents.zip");
+    }
+
+    private int countTemporaryFiles() {
+        return new File(System.getProperty("java.io.tmpdir")).listFiles(
+                new FilenameFilter() {
+                    public boolean accept(File dir, String name) {
+                        return name.startsWith("apache-tika-");
+                    }
+                }).length;
+    }
+
+    private void assertRemovalTempfiles(String fileName) throws Exception {
+        int numberOfTempFiles = countTemporaryFiles();
+
+        try (TikaInputStream stream = TikaInputStream.get(
+                TestContainerAwareDetector.class.getResource("/test-documents/" + fileName))) {
+            detector.detect(stream, new Metadata());
+        }
+
+        assertEquals(numberOfTempFiles, countTemporaryFiles());
+    }
+
+    @Test
+    public void testDetectIWork() throws Exception {
+        assertTypeByData("testKeynote.key", "application/vnd.apple.keynote");
+        assertTypeByData("testNumbers.numbers", "application/vnd.apple.numbers");
+        assertTypeByData("testPages.pages", "application/vnd.apple.pages");
+    }
+
+    @Test
+    public void testDetectKMZ() throws Exception {
+       assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
+    }
+    
+    @Test
+    public void testDetectIPA() throws Exception {
+        assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
+        assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
+    }
+    
+    @Test
+    public void testASiC() throws Exception {
+        assertTypeByData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+        assertTypeByData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+        assertTypeByNameAndData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+        assertTypeByNameAndData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+    }
+     
+    @Test
+    public void testDetectZip() throws Exception {
+        assertTypeByData("test-documents.zip", "application/zip");
+        assertTypeByData("test-zip-of-zip.zip", "application/zip");
+        
+        // JAR based formats
+        assertTypeByData("testJAR.jar", "application/java-archive");
+        assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
+        assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
+        assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
+        
+        // JAR with HTML files in it
+        assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
+                                "application/java-archive", "application/java-archive");
+    }
+
+    private TikaInputStream getTruncatedFile(String name, int n)
+            throws IOException {
+        try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
+                "/test-documents/" + name)) {
+            byte[] bytes = new byte[n];
+            int m = 0;
+            while (m < bytes.length) {
+                int i = input.read(bytes, m, bytes.length - m);
+                if (i != -1) {
+                    m += i;
+                } else {
+                    throw new IOException("Unexpected end of stream");
+                }
+            }
+            return TikaInputStream.get(bytes);
+        }
+    }
+
+    @Test
+    public void testTruncatedFiles() throws Exception {
+        // First up a truncated OOXML (zip) file
+       
+        // With only the data supplied, the best we can do is the container
+        Metadata m = new Metadata();
+        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+            assertEquals(
+                    MediaType.application("x-tika-ooxml"),
+                    detector.detect(xlsx, m));
+        }
+        
+        // With truncated data + filename, we can use the filename to specialise
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
+        try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+            assertEquals(
+                    MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+                    detector.detect(xlsx, m));
+        }
+
+        // Now a truncated OLE2 file 
+        m = new Metadata();
+        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+            assertEquals(
+                    MediaType.application("x-tika-msoffice"),
+                    detector.detect(xls, m));
+        }
+        
+        // Finally a truncated OLE2 file, with a filename available
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
+        try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+            assertEquals(
+                    MediaType.application("vnd.ms-excel"),
+                    detector.detect(xls, m));
+        }
+   }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
new file mode 100644
index 0000000..45f68cc
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.embedder;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.txt.TXTParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Unit test for {@link ExternalEmbedder}s.
+ */
+public class ExternalEmbedderTest extends TikaTest {
+
+    static Path TMP_TEST_TXT;
+    protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
+            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
+    protected static final String DEFAULT_CHARSET = UTF_8.name();
+    private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION = "dc:description";
+    private static final String TEST_TXT_PATH = "test-documents/testTXT.txt";
+
+    private TemporaryResources tmp = new TemporaryResources();
+
+    @BeforeClass
+    public static void copyTestFile() throws Exception {
+        TMP_TEST_TXT = Files.createTempFile("tika-test", "");
+        Files.copy(TikaTest.class.getClassLoader().getResourceAsStream(TEST_TXT_PATH),
+                TMP_TEST_TXT, StandardCopyOption.REPLACE_EXISTING);
+    }
+
+    @AfterClass
+    public static void rmTestFile() throws Exception {
+        Files.delete(TMP_TEST_TXT);
+    }
+
+    /**
+     * Gets the expected returned metadata value for the given field
+     *
+     * @param fieldName
+     * @return a prefix added to the field name
+     */
+    protected String getExpectedMetadataValueString(String fieldName, Date timestamp) {
+        return this.getClass().getSimpleName() + " embedded " + fieldName +
+                " on " + EXPECTED_METADATA_DATE_FORMATTER.format(timestamp);
+    }
+
+    /**
+     * Gets the tika <code>Metadata</code> object containing data to be
+     * embedded.
+     *
+     * @return the populated tika metadata object
+     */
+    protected Metadata getMetadataToEmbed(Date timestamp) {
+        Metadata metadata = new Metadata();
+        metadata.add(TikaCoreProperties.DESCRIPTION,
+                getExpectedMetadataValueString(TikaCoreProperties.DESCRIPTION.toString(), timestamp));
+        return metadata;
+    }
+
+    /**
+     * Gets the <code>Embedder</code> to test.
+     *
+     * @return the embedder under test
+     */
+    protected Embedder getEmbedder() {
+        ExternalEmbedder embedder = new ExternalEmbedder();
+        Map<Property, String[]> metadataCommandArguments = new HashMap<Property, String[]>(1);
+        metadataCommandArguments.put(TikaCoreProperties.DESCRIPTION,
+                new String[] { COMMAND_METADATA_ARGUMENT_DESCRIPTION });
+        embedder.setMetadataCommandArguments(metadataCommandArguments);
+        return embedder;
+    }
+
+    /**
+     * Gets the source input stream through standard Java resource loaders 
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     */
+    protected InputStream getSourceStandardInputStream() {
+        return this.getClass().getResourceAsStream(TEST_TXT_PATH);
+    }
+
+    /**
+     * Gets the source input stream via {@link TikaInputStream}
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     * @throws FileNotFoundException 
+     */
+    protected InputStream getSourceTikaInputStream() throws IOException {
+        return TikaInputStream.get(TMP_TEST_TXT);
+    }
+
+    /**
+     * Gets the parser to use to verify the result of the embed operation.
+     *
+     * @return the parser to read embedded metadata
+     */
+    protected Parser getParser() {
+        return new TXTParser();
+    }
+
+    /**
+     * Whether or not the final result of reading the now embedded metadata is
+     * expected in the output of the external tool
+     *
+     * @return whether or not results are expected in command line output
+     */
+    protected boolean getIsMetadataExpectedInOutput() {
+        return true;
+    }
+
+    /**
+     * Tests embedding metadata then reading metadata to verify the results.
+     *
+     * @param isResultExpectedInOutput whether or not results are expected in command line output
+     */
+    protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
+        Embedder embedder = getEmbedder();
+        
+        // TODO Move this check to ExternalEmbedder
+        String os = System.getProperty("os.name", "");
+        if (os.contains("Windows")) {
+            // Skip test on Windows
+            return;
+        }
+        
+        Date timestamp = new Date();
+        Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
+
+        try {
+            File tempOutputFile = tmp.createTemporaryFile();
+            FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
+
+            // Embed the metadata into a copy of the original output stream
+            embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
+
+            ParseContext context = new ParseContext();
+            Parser parser = getParser();
+            context.set(Parser.class, parser);
+
+            // Setup the extracting content handler
+            ByteArrayOutputStream result = new ByteArrayOutputStream();
+            OutputStreamWriter outputWriter = new OutputStreamWriter(result,DEFAULT_CHARSET);
+            ContentHandler handler = new BodyContentHandler(outputWriter);
+
+            // Create a new metadata object to read the new metadata into
+            Metadata embeddedMetadata = new Metadata();
+
+            // Setup a re-read of the now embeded temp file
+            FileInputStream embeddedFileInputStream = new FileInputStream(tempOutputFile);
+
+            parser.parse(embeddedFileInputStream, handler, embeddedMetadata,
+                    context);
+
+            tmp.dispose();
+
+            String outputString = null;
+            if (isResultExpectedInOutput) {
+                outputString = result.toString(DEFAULT_CHARSET);
+            } else {
+                assertTrue("no metadata found", embeddedMetadata.size() > 0);
+            }
+
+            // Check each metadata property for the expected value
+            for (String metadataName : metadataToEmbed.names()) {
+                if (metadataToEmbed.get(metadataName) != null) {
+                    String expectedValue = metadataToEmbed.get(metadataName);
+                    boolean foundExpectedValue = false;
+                    if (isResultExpectedInOutput) {
+                        // just check that the entire output contains the expected string
+                        foundExpectedValue = outputString.contains(expectedValue);
+                    } else {
+                        if (embeddedMetadata.isMultiValued(metadataName)) {
+                            for (String embeddedValue : embeddedMetadata.getValues(metadataName)) {
+                                if (embeddedValue != null) {
+                                    if (embeddedValue.contains(expectedValue)) {
+                                        foundExpectedValue = true;
+                                        break;
+                                    }
+                                }
+                            }
+                        } else {
+                            String embeddedValue = embeddedMetadata.get(metadataName);
+                            assertNotNull("expected metadata for "
+                                    + metadataName + " not found",
+                                    embeddedValue);
+                            foundExpectedValue = embeddedValue.contains(expectedValue);
+                        }
+                    }
+                    assertTrue(
+                            "result did not contain expected appended metadata "
+                                    + metadataName + "="
+                                    + expectedValue,
+                            foundExpectedValue);
+                }
+            }
+        } catch (IOException e) {
+            fail(e.getMessage());
+        } catch (TikaException e) {
+            fail(e.getMessage());
+        } catch (SAXException e) {
+            fail(e.getMessage());
+        }
+    }
+    
+    protected void checkSourceFileExists() {
+        String message = "the original input file was deleted";
+        assertNotNull(message, TMP_TEST_TXT);
+        assertTrue(message, Files.isRegularFile(TMP_TEST_TXT));
+    }
+
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void testEmbedStandardInputStream() throws IOException {
+        embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+    
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void testEmbedTikaInputStream() throws IOException {
+        embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
new file mode 100644
index 0000000..447042b
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypeTest {
+
+    private MimeTypes types;
+    private MimeType text;
+
+    @Before
+    public void setUp() throws MimeTypeException {
+        types = new MimeTypes();
+        text = types.forName("text/plain");
+    }
+
+    /** Test MimeType constructor */
+    @Test
+    public void testConstrctor() {
+        // Missing name
+        try {
+            new MimeType(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+    @Test
+    public void testIsValidName() {
+        assertTrue(MimeType.isValid("application/octet-stream"));
+        assertTrue(MimeType.isValid("text/plain"));
+        assertTrue(MimeType.isValid("foo/bar"));
+        assertTrue(MimeType.isValid("a/b"));
+
+        assertFalse(MimeType.isValid("application"));
+        assertFalse(MimeType.isValid("application/"));
+        assertFalse(MimeType.isValid("/"));
+        assertFalse(MimeType.isValid("/octet-stream"));
+        assertFalse(MimeType.isValid("application//octet-stream"));
+        assertFalse(MimeType.isValid("application/octet=stream"));
+        assertFalse(MimeType.isValid("application/\u00f6ctet-stream"));
+        assertFalse(MimeType.isValid("text/plain;"));
+        assertFalse(MimeType.isValid("text/plain; charset=UTF-8"));
+        try {
+            MimeType.isValid(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+    /** Test MimeType setDescription() */
+    @Test
+    public void testSetEmptyValues() {
+        try {
+            text.setDescription(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+        
+        try {
+            text.setAcronym(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+        
+        try {
+            text.addLink(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+
+        try {
+            text.setUniformTypeIdentifier(null);
+            fail("Expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected result
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
new file mode 100644
index 0000000..be8a575
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.apache.tika.mime.MediaType.OCTET_STREAM;
+import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypesTest {
+
+    private MimeTypes types;
+
+    private MediaTypeRegistry registry;
+
+    private MimeType binary;
+
+    private MimeType text;
+
+    private MimeType html;
+
+    @Before
+    public void setUp() throws MimeTypeException {
+        types = new MimeTypes();
+        registry = types.getMediaTypeRegistry();
+        binary = types.forName("application/octet-stream");
+        text = types.forName("text/plain");
+        types.addAlias(text, MediaType.parse("text/x-plain"));
+        html = types.forName("text/html");
+        types.setSuperType(html, TEXT_PLAIN);
+    }
+
+    @Test
+    public void testForName() throws MimeTypeException {
+        assertEquals(text, types.forName("text/plain"));
+        assertEquals(text, types.forName("TEXT/PLAIN"));
+
+        try {
+            types.forName("invalid");
+            fail("MimeTypeException not thrown on invalid type name");
+        } catch (MimeTypeException e) {
+            // expected
+        }
+    }
+
+    @Test
+    public void testRegisteredMimes() throws MimeTypeException {
+        String dummy = "text/xxxxx";
+        assertEquals(text, types.getRegisteredMimeType("text/plain"));
+        assertNull(types.getRegisteredMimeType(dummy));
+        assertNotNull(types.forName(dummy));
+        assertEquals(dummy, types.forName("text/xxxxx").getType().toString());
+        assertEquals(dummy, types.getRegisteredMimeType("text/xxxxx").getType().toString());
+        
+        try {
+            types.forName("invalid");
+            fail("MimeTypeException not thrown on invalid type name");
+        } catch (MimeTypeException e) {
+            // expected
+        }
+    }
+
+    @Test
+    public void testSuperType() throws MimeTypeException {
+        assertNull(registry.getSupertype(OCTET_STREAM));
+        assertEquals(OCTET_STREAM, registry.getSupertype(TEXT_PLAIN));
+        assertEquals(TEXT_PLAIN, registry.getSupertype(html.getType()));
+   }
+
+    @Test
+    public void testIsDescendantOf() {
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(html.getType(), html.getType()));
+
+        assertTrue(registry.isSpecializationOf(html.getType(), OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, html.getType()));
+
+        assertTrue(registry.isSpecializationOf(html.getType(), TEXT_PLAIN));
+        assertFalse(registry.isSpecializationOf(TEXT_PLAIN, html.getType()));
+
+        assertTrue(registry.isSpecializationOf(TEXT_PLAIN, OCTET_STREAM));
+        assertFalse(registry.isSpecializationOf(OCTET_STREAM, TEXT_PLAIN));
+    }
+
+    @Test
+    public void testCompareTo() {
+        assertTrue(binary.compareTo(binary) == 0);
+        assertTrue(binary.compareTo(text) != 0);
+        assertTrue(binary.compareTo(html) != 0);
+
+        assertTrue(text.compareTo(binary) != 0);
+        assertTrue(text.compareTo(text) == 0);
+        assertTrue(text.compareTo(html) != 0);
+
+        assertTrue(html.compareTo(binary) != 0);
+        assertTrue(html.compareTo(text) != 0);
+        assertTrue(html.compareTo(html) == 0);
+    }
+
+}