You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/03/22 02:19:24 UTC
[12/13] tika git commit: TIKA-1855 -- first pass. Need to turn back
on the forbidden-apis testCheck. More clean up remains.
TIKA-1855 -- first pass. Need to turn back on the forbidden-apis testCheck. More clean up remains.
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/aa5f60d7
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/aa5f60d7
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/aa5f60d7
Branch: refs/heads/2.x
Commit: aa5f60d7a0ac0a6a9d739344c76b10940132503f
Parents: 41915dc
Author: tballison <ta...@mitre.org>
Authored: Mon Mar 21 21:18:00 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Mar 21 21:18:05 2016 -0400
----------------------------------------------------------------------
pom.xml | 7 +-
tika-app/pom.xml | 15 +
.../batch/builders/AppParserFactoryBuilder.java | 2 +-
.../main/java/org/apache/tika/cli/TikaCLI.java | 2 +-
.../main/java/org/apache/tika/gui/TikaGUI.java | 2 +-
.../tika/config/TikaDetectorConfigTest.java | 143 +++
.../tika/config/TikaParserConfigTest.java | 155 +++
.../tika/config/TikaTranslatorConfigTest.java | 73 ++
.../tika/detect/TestContainerAwareDetector.java | 410 +++++++
.../tika/embedder/ExternalEmbedderTest.java | 285 +++++
.../java/org/apache/tika/mime/MimeTypeTest.java | 108 ++
.../org/apache/tika/mime/MimeTypesTest.java | 122 ++
.../org/apache/tika/mime/TestMimeTypes.java | 1044 +++++++++++++++++
.../tika/parser/AutoDetectParserTest.java | 459 ++++++++
.../apache/tika/parser/DigestingParserTest.java | 139 +++
.../apache/tika/parser/ParsingReaderTest.java | 104 ++
.../tika/parser/RecursiveParserWrapperTest.java | 312 ++++++
.../org/apache/tika/parser/TestParsers.java | 133 +++
.../parser/fork/ForkParserIntegrationTest.java | 268 +++++
.../apache/tika/parser/mock/MockParserTest.java | 251 +++++
.../org/apache/tika/parser/pkg/PackageTest.java | 335 ++++++
.../sax/PhoneExtractingContentHandlerTest.java | 58 +
.../tika/utils/ServiceLoaderUtilsTest.java | 57 +
tika-core/pom.xml | 19 +
.../tika/parser/digesting/CommonsDigester.java | 295 +++++
.../src/test/java/org/apache/tika/TikaTest.java | 74 +-
.../tika/detect/MimeDetectionWithNNTest.java | 8 +-
.../org/apache/tika/mime/MimeDetectionTest.java | 7 +-
.../mime/ProbabilisticMimeDetectionTest.java | 7 +-
.../ProbabilisticMimeDetectionTestWithTika.java | 7 +-
.../java/org/apache/tika/osgi/BundleIT.java | 11 -
.../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb | Bin 1362900 -> 0 bytes
.../org/apache/tika/mime/brwNIMS_2014.dif | 56 -
.../apache/tika/mime/circles-with-prefix.svg | 8 -
.../resources/org/apache/tika/mime/circles.svg | 8 -
.../org/apache/tika/mime/datamatrix.png | Bin 204 -> 0 bytes
.../tika/mime/gdas1.forecmwf.2014062612.grib2 | Bin 2489194 -> 0 bytes
.../resources/org/apache/tika/mime/htmlfragment | 18 -
.../apache/tika/mime/plotutils-bin-cgm-v3.cgm | Bin 1744 -> 0 bytes
.../org/apache/tika/mime/stylesheet.xsl | 9 -
.../apache/tika/mime/test-difficult-rdf1.xml | 39 -
.../apache/tika/mime/test-difficult-rdf2.xml | 44 -
.../org/apache/tika/mime/test-iso-8859-1.xml | 2 -
.../org/apache/tika/mime/test-long-comment.xml | 21 -
.../tika/mime/test-malformed-header.html.bin | Bin 305 -> 0 bytes
.../org/apache/tika/mime/test-tika-327.html | 50 -
.../org/apache/tika/mime/test-utf16be.xml | Bin 126 -> 0 bytes
.../org/apache/tika/mime/test-utf16le.xml | Bin 126 -> 0 bytes
.../org/apache/tika/mime/test-utf8-bom.xml | 2 -
.../org/apache/tika/mime/test-utf8.xml | 2 -
.../resources/org/apache/tika/mime/test.html | 10 -
.../resources/org/apache/tika/mime/test.xls | Bin 13824 -> 0 bytes
.../org/apache/tika/mime/testlargerbuffer.html | 827 --------------
tika-parent/pom.xml | 3 +-
tika-parser-modules/pom.xml | 26 -
.../tika/parser/ner/NamedEntityParserTest.java | 16 +-
.../parser/ner/regex/RegexNERecogniserTest.java | 15 +-
.../apache/tika/parser/ner/regex/ner-regex.txt | 17 +
.../tika/parser/ner/tika-config-for-ner.xml | 27 +
.../tika/parser/jdbc/SQLite3ParserTest.java | 50 +-
.../tika/parser/chm/TestChmExtraction.java | 25 +-
.../tika/parser/microsoft/ExcelParserTest.java | 387 +++----
.../apache/tika/parser/odf/ODFParserTest.java | 460 ++++----
.../apache/tika/parser/rtf/RTFParserTest.java | 163 +--
.../apache/tika/parser/pdf/PDFParserTest.java | 133 +--
.../tika/parser/isatab/ISArchiveParser.java | 3 +-
.../apache/tika/parser/netcdf/NetCDFParser.java | 17 +-
.../apache/tika/parser/dif/DIFParserTest.java | 31 +-
.../tika/parser/envi/EnviHeaderParserTest.java | 36 +-
.../apache/tika/parser/gdal/TestGDALParser.java | 34 +-
.../tika/parser/geo/topic/GeoParserTest.java | 23 +-
.../GeographicInformationParserTest.java | 50 +-
.../apache/tika/parser/grib/GribParserTest.java | 30 +-
.../apache/tika/parser/hdf/HDFParserTest.java | 44 +-
.../tika/parser/isatab/ISArchiveParserTest.java | 80 +-
.../apache/tika/parser/mat/MatParserTest.java | 60 +-
.../tika/parser/netcdf/NetCDFParserTest.java | 48 +-
.../tika/parser/strings/StringsParserTest.java | 23 +-
.../tika/parser/txt/CharsetDetectorTest.java | 7 +-
.../apache/tika/parser/txt/TXTParserTest.java | 51 +-
.../apache/tika/parser/xml/DcXMLParserTest.java | 28 +-
.../EmptyAndDuplicateElementsXMLParserTest.java | 60 +-
.../tika/parser/xml/FictionBookParserTest.java | 19 +-
tika-parsers/pom.xml | 333 ------
.../main/appended-resources/META-INF/LICENSE | 94 --
.../apache/tika/parser/internal/Activator.java | 54 -
.../tika/parser/utils/CommonsDigester.java | 299 -----
.../test/java/org/apache/tika/TestParsers.java | 109 --
.../tika/config/TikaDetectorConfigTest.java | 143 ---
.../tika/config/TikaParserConfigTest.java | 157 ---
.../tika/config/TikaTranslatorConfigTest.java | 72 --
.../tika/detect/TestContainerAwareDetector.java | 410 -------
.../tika/embedder/ExternalEmbedderTest.java | 292 -----
.../java/org/apache/tika/mime/MimeTypeTest.java | 105 --
.../org/apache/tika/mime/MimeTypesTest.java | 122 --
.../org/apache/tika/mime/TestMimeTypes.java | 1047 ------------------
.../tika/parser/AutoDetectParserTest.java | 459 --------
.../apache/tika/parser/DigestingParserTest.java | 136 ---
.../apache/tika/parser/ParsingReaderTest.java | 104 --
.../tika/parser/RecursiveParserWrapperTest.java | 312 ------
.../parser/fork/ForkParserIntegrationTest.java | 268 -----
.../apache/tika/parser/mock/MockParserTest.java | 251 -----
.../org/apache/tika/parser/pkg/PackageTest.java | 335 ------
.../sax/PhoneExtractingContentHandlerTest.java | 58 -
.../tika/utils/ServiceLoaderUtilsTest.java | 57 -
tika-server/pom.xml | 8 +-
.../org/apache/tika/server/TikaServerCli.java | 2 +-
.../org/apache/tika/server/CXFTestBase.java | 14 +-
.../tika/server/DetectorResourceTest.java | 6 +-
.../tika/server/LanguageResourceTest.java | 4 +-
.../tika/server/MetadataResourceTest.java | 26 +-
.../server/RecursiveMetadataResourceTest.java | 36 +-
.../apache/tika/server/StackTraceOffTest.java | 8 +-
.../org/apache/tika/server/StackTraceTest.java | 8 +-
.../org/apache/tika/server/TikaParsersTest.java | 12 +-
.../apache/tika/server/TikaResourceTest.java | 23 +-
.../tika/server/UnpackerResourceTest.java | 20 +-
tika-server/src/test/resources/2exe.docx | Bin 715333 -> 0 bytes
tika-server/src/test/resources/2pic.doc | Bin 4339712 -> 0 bytes
tika-server/src/test/resources/2pic.docx | Bin 883427 -> 0 bytes
.../src/test/resources/CDEC_WEATHER_2010_03_02 | 98 --
tika-server/src/test/resources/Doc1_ole.doc | Bin 89600 -> 0 bytes
tika-server/src/test/resources/english.txt | 1 -
tika-server/src/test/resources/foo.csv | 4 -
tika-server/src/test/resources/french.txt | 1 -
.../test/resources/mime/custom-mimetypes.xml | 24 -
.../src/test/resources/mock/null_pointer.xml | 25 -
.../org/apache/tika/mime/custom-mimetypes.xml | 24 +
tika-server/src/test/resources/password.xls | Bin 22528 -> 0 bytes
tika-server/src/test/resources/pic.xls | Bin 593920 -> 0 bytes
tika-server/src/test/resources/pic.xlsx | Bin 580188 -> 0 bytes
tika-server/src/test/resources/test.doc | Bin 9216 -> 0 bytes
.../testRTF_npeFromWMFInTikaServer.rtf | 235 ----
.../test/resources/test_recursive_embedded.docx | Bin 27082 -> 0 bytes
tika-test-resources/pom.xml | 7 -
.../apache/tika/parser/ner/regex/ner-regex.txt | 17 -
.../org/apache/tika/parser/ner/tika-config.xml | 27 -
.../src/test/resources/test-documents/2exe.docx | Bin 0 -> 715333 bytes
.../src/test/resources/test-documents/2pic.doc | Bin 0 -> 4339712 bytes
.../src/test/resources/test-documents/2pic.docx | Bin 0 -> 883427 bytes
.../test-documents/CDEC_WEATHER_2010_03_02 | 98 ++
.../resources/test-documents/brwNIMS_2014.dif | 56 +
.../test-documents/circles-with-prefix.svg | 8 +
.../test/resources/test-documents/circles.svg | 8 +
.../resources/test-documents/datamatrix.png | Bin 0 -> 204 bytes
.../test/resources/test-documents/english.txt | 1 +
.../src/test/resources/test-documents/foo.csv | 4 +
.../test/resources/test-documents/french.txt | 1 +
.../test/resources/test-documents/htmlfragment | 18 +
.../test-documents/mock/null_pointer.xml | 4 +-
.../test/resources/test-documents/password.xls | Bin 0 -> 22528 bytes
.../src/test/resources/test-documents/pic.xls | Bin 0 -> 593920 bytes
.../src/test/resources/test-documents/pic.xlsx | Bin 0 -> 580188 bytes
.../test-documents/plotutils-bin-cgm-v3.cgm | Bin 0 -> 1744 bytes
.../resources/test-documents/stylesheet.xsl | 9 +
.../test-documents/test-difficult-rdf1.xml | 39 +
.../test-documents/test-difficult-rdf2.xml | 44 +
.../test-documents/test-iso-8859-1.xml | 2 +
.../test-documents/test-long-comment.xml | 21 +
.../resources/test-documents/test-tika-327.html | 50 +
.../resources/test-documents/test-utf16be.xml | Bin 0 -> 126 bytes
.../resources/test-documents/test-utf16le.xml | Bin 0 -> 126 bytes
.../resources/test-documents/test-utf8-bom.xml | 2 +
.../test/resources/test-documents/test-utf8.xml | 2 +
.../src/test/resources/test-documents/test.html | 10 +
.../src/test/resources/test-documents/test.xls | Bin 0 -> 13824 bytes
.../testRTF_npeFromWMFInTikaServer.rtf | 235 ++++
.../test-documents/testlargerbuffer.html | 827 ++++++++++++++
168 files changed, 7231 insertions(+), 8029 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index c790244..ea4f114 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,9 +46,10 @@
<modules>
<module>tika-parent</module>
- <module>tika-core</module>
<module>tika-test-resources</module>
- <module>tika-parsers</module>
+ <module>tika-core</module>
+ <module>tika-parser-modules</module>
+ <module>tika-parser-bundles</module>
<module>tika-xmp</module>
<module>tika-serialization</module>
<module>tika-batch</module>
@@ -59,8 +60,6 @@
<module>tika-langdetect</module>
<module>tika-example</module>
<module>tika-java7</module>
- <module>tika-parser-modules</module>
- <module>tika-parser-bundles</module>
</modules>
<profiles>
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/pom.xml
----------------------------------------------------------------------
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index e362391..9177afb 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -101,6 +101,21 @@
<groupId>commons-io</groupId>
<version>${commons.io.version}</version>
</dependency>
+ <!-- test dependencies -->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-test-resources</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
index 998f649..98f4343 100644
--- a/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/batch/builders/AppParserFactoryBuilder.java
@@ -23,7 +23,7 @@ import java.util.Map;
import org.apache.tika.batch.DigestingAutoDetectParserFactory;
import org.apache.tika.batch.ParserFactory;
import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
import org.apache.tika.util.ClassLoaderUtil;
import org.apache.tika.util.XMLDOMUtil;
import org.w3c.dom.Node;
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 314599e..a2b91c9 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -101,7 +101,7 @@ import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerFactory;
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
----------------------------------------------------------------------
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index 5ecc763..1bc9405 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -76,7 +76,7 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.utils.CommonsDigester;
+import org.apache.tika.parser.digesting.CommonsDigester;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
new file mode 100644
index 0000000..132475a
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.EmptyDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.mbox.OutlookPSTParser;
+import org.apache.tika.parser.microsoft.POIFSContainerDetector;
+import org.apache.tika.parser.pkg.ZipContainerDetector;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ * that {@link TikaConfigTest} can't do due to a need for the
+ * full set of detectors
+ */
+public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
+ @Test
+ public void testDetectorExcludeFromDefault() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ CompositeDetector detector = (CompositeDetector)config.getDetector();
+
+ // Should be wrapping two detectors
+ assertEquals(2, detector.getDetectors().size());
+
+
+ // First should be DefaultDetector, second Empty, that order
+ assertEquals(DefaultDetector.class, detector.getDetectors().get(0).getClass());
+ assertEquals(EmptyDetector.class, detector.getDetectors().get(1).getClass());
+
+
+ // Get the DefaultDetector from the config
+ DefaultDetector confDetector = (DefaultDetector)detector.getDetectors().get(0);
+
+ // Get a fresh "default" DefaultParser
+ DefaultDetector normDetector = new DefaultDetector(config.getMimeRepository());
+
+
+ // The default one will offer the Zip and POIFS detectors
+ assertDetectors(normDetector, true, true);
+
+
+ // The one from the config won't, as we excluded those
+ assertDetectors(confDetector, false, false);
+ }
+
+ /**
+ * TIKA-1708 - If the Zip detector is disabled, either explicitly,
+ * or via giving a list of detectors that it isn't part of, ensure
+ * that detection of PST files still works
+ */
+ @Test
+ public void testPSTDetectionWithoutZipDetector() throws Exception {
+ // Check the one with an exclude
+ TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
+ assertNotNull(configWX.getParser());
+ assertNotNull(configWX.getDetector());
+ CompositeDetector detectorWX = (CompositeDetector)configWX.getDetector();
+
+ // Check it has the POIFS one, but not the zip one
+ assertDetectors(detectorWX, true, false);
+
+
+ // Check the one with an explicit list
+ TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
+ assertNotNull(configCL.getParser());
+ assertNotNull(configCL.getDetector());
+ CompositeDetector detectorCL = (CompositeDetector)configCL.getDetector();
+ assertEquals(2, detectorCL.getDetectors().size());
+
+ // Check it also has the POIFS one, but not the zip one
+ assertDetectors(detectorCL, true, false);
+
+
+ // Check that both detectors have a mimetypes with entries
+ assertTrue("Not enough mime types: " + configWX.getMediaTypeRegistry().getTypes().size(),
+ configWX.getMediaTypeRegistry().getTypes().size() > 100);
+ assertTrue("Not enough mime types: " + configCL.getMediaTypeRegistry().getTypes().size(),
+ configCL.getMediaTypeRegistry().getTypes().size() > 100);
+
+
+ // Now check they detect PST files correctly
+ TikaInputStream stream = TikaInputStream.cast(
+ getTestDocumentAsStream("testPST.pst"));
+ assertEquals(
+ OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE,
+ detectorWX.detect(stream, new Metadata())
+ );
+ assertEquals(
+ OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE,
+ detectorCL.detect(stream, new Metadata())
+ );
+ }
+
+ private void assertDetectors(CompositeDetector detector, boolean shouldHavePOIFS,
+ boolean shouldHaveZip) {
+ boolean hasZip = false;
+ boolean hasPOIFS = false;
+ for (Detector d : detector.getDetectors()) {
+ if (d instanceof ZipContainerDetector) {
+ if (shouldHaveZip) {
+ hasZip = true;
+ } else {
+ fail("Shouldn't have the ZipContainerDetector from config");
+ }
+ }
+ if (d instanceof POIFSContainerDetector) {
+ if (shouldHavePOIFS) {
+ hasPOIFS = true;
+ } else {
+ fail("Shouldn't have the POIFSContainerDetector from config");
+ }
+ }
+ }
+ if (shouldHavePOIFS) assertTrue("Should have the POIFSContainerDetector", hasPOIFS);
+ if (shouldHaveZip) assertTrue("Should have the ZipContainerDetector", hasZip);
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
new file mode 100644
index 0000000..817beb4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.executable.ExecutableParser;
+import org.apache.tika.parser.xml.XMLParser;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ * that {@link TikaConfigTest} can't do due to a need for the
+ * full set of parsers
+ */
+public class TikaParserConfigTest extends AbstractTikaConfigTest {
+ @Test
+ public void testMimeExcludeInclude() throws Exception {
+ TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ Parser parser = config.getParser();
+
+ MediaType PDF = MediaType.application("pdf");
+ MediaType JPEG = MediaType.image("jpeg");
+
+
+ // Has two parsers
+ assertEquals(CompositeParser.class, parser.getClass());
+ CompositeParser cParser = (CompositeParser)parser;
+ assertEquals(2, cParser.getAllComponentParsers().size());
+
+ // Both are decorated
+ assertTrue(cParser.getAllComponentParsers().get(0) instanceof ParserDecorator);
+ assertTrue(cParser.getAllComponentParsers().get(1) instanceof ParserDecorator);
+ ParserDecorator p0 = (ParserDecorator)cParser.getAllComponentParsers().get(0);
+ ParserDecorator p1 = (ParserDecorator)cParser.getAllComponentParsers().get(1);
+
+
+ // DefaultParser will be wrapped with excludes
+ assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
+
+ assertNotContained(PDF, p0.getSupportedTypes(context));
+ assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
+ assertNotContained(JPEG, p0.getSupportedTypes(context));
+ assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
+
+
+ // Will have an empty parser for PDF
+ assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
+ assertEquals(1, p1.getSupportedTypes(context).size());
+ assertContains(PDF, p1.getSupportedTypes(context));
+ assertNotContained(PDF, p1.getWrappedParser().getSupportedTypes(context));
+ }
+
+ @Test
+ public void testParserExcludeFromDefault() throws Exception {
+ TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ CompositeParser parser = (CompositeParser)config.getParser();
+
+ MediaType PE_EXE = MediaType.application("x-msdownload");
+ MediaType ELF = MediaType.application("x-elf");
+
+
+ // Get the DefaultParser from the config
+ ParserDecorator confWrappedParser = (ParserDecorator)parser.getParsers().get(MediaType.APPLICATION_XML);
+ assertNotNull(confWrappedParser);
+ DefaultParser confParser = (DefaultParser)confWrappedParser.getWrappedParser();
+
+ // Get a fresh "default" DefaultParser
+ DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
+
+
+ // The default one will offer the Executable Parser
+ assertContains(PE_EXE, normParser.getSupportedTypes(context));
+ assertContains(ELF, normParser.getSupportedTypes(context));
+
+ boolean hasExec = false;
+ for (Parser p : normParser.getParsers().values()) {
+ if (p instanceof ExecutableParser) {
+ hasExec = true;
+ break;
+ }
+ }
+ assertTrue(hasExec);
+
+
+ // The one from the config won't
+ assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
+ assertNotContained(ELF, confParser.getSupportedTypes(context));
+
+ for (Parser p : confParser.getParsers().values()) {
+ if (p instanceof ExecutableParser)
+ fail("Shouldn't have the Executable Parser from config");
+ }
+ }
+ /**
+ * TIKA-1558 It should be possible to exclude Parsers from being picked up by
+ * DefaultParser.
+ */
+ @Test
+ public void defaultParserBlacklist() throws Exception {
+ TikaConfig config = new TikaConfig();
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ CompositeParser cp = (CompositeParser) config.getParser();
+ List<Parser> parsers = cp.getAllComponentParsers();
+
+ boolean hasXML = false;
+ for (Parser p : parsers) {
+ if (p instanceof XMLParser) {
+ hasXML = true;
+ break;
+ }
+ }
+ assertTrue("Default config should include an XMLParser.", hasXML);
+
+ // This custom TikaConfig should exclude XMLParser and all of its subclasses.
+ config = getConfig("TIKA-1558-blacklistsub.xml");
+ cp = (CompositeParser) config.getParser();
+ parsers = cp.getAllComponentParsers();
+
+ for (Parser p : parsers) {
+ if (p instanceof XMLParser)
+ fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
new file mode 100644
index 0000000..764bbe4
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ * that {@link TikaConfigTest} can't do due to a need for the
+ * full set of translators
+ */
+public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
+ @Test
+ public void testDefaultBehaviour() throws Exception {
+ TikaConfig config = TikaConfig.getDefaultConfig();
+ assertNotNull(config.getTranslator());
+ assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+ }
+
+ @Test
+ public void testRequestsDefault() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+ }
+
+ @Test
+ public void testRequestsEmpty() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+ }
+
+ /**
+ * Currently, Translators don't support Composites, so
+ * if multiple translators are given, only the first wins
+ */
+ @Test
+ public void testRequestsMultiple() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
new file mode 100644
index 0000000..5787408
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link ContainerAwareDetector}
+ */
+public class TestContainerAwareDetector {
+ private final TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ private final MimeTypes mimeTypes = tikaConfig.getMimeRepository();
+ private final Detector detector = new DefaultDetector(mimeTypes);
+
+ private void assertTypeByData(String file, String type) throws Exception {
+ assertTypeByNameAndData(file, null, type);
+ }
+ private void assertTypeByNameAndData(String file, String type) throws Exception {
+ assertTypeByNameAndData(file, file, type);
+ }
+ private void assertType(String file, String byData, String byNameAndData) throws Exception {
+ assertTypeByData(file, byData);
+ assertTypeByNameAndData(file, byNameAndData);
+ }
+ private void assertTypeByNameAndData(String dataFile, String name, String type) throws Exception {
+ assertTypeByNameAndData(dataFile, name, type, null);
+ }
+ private void assertTypeByNameAndData(String dataFile, String name, String typeFromDetector, String typeFromMagic) throws Exception {
+ try (TikaInputStream stream = TikaInputStream.get(
+ TestContainerAwareDetector.class.getResource("/test-documents/" + dataFile))) {
+ Metadata m = new Metadata();
+ if (name != null)
+ m.add(Metadata.RESOURCE_NAME_KEY, name);
+
+ // Mime Magic version is likely to be less precise
+ if (typeFromMagic != null) {
+ assertEquals(
+ MediaType.parse(typeFromMagic),
+ mimeTypes.detect(stream, m));
+ }
+
+ // All being well, the detector should get it perfect
+ assertEquals(
+ MediaType.parse(typeFromDetector),
+ detector.detect(stream, m));
+ }
+ }
+
+ @Test
+ public void testDetectOLE2() throws Exception {
+ // Microsoft office types known by POI
+ assertTypeByData("testEXCEL.xls", "application/vnd.ms-excel");
+ assertTypeByData("testWORD.doc", "application/msword");
+ assertTypeByData("testPPT.ppt", "application/vnd.ms-powerpoint");
+
+ assertTypeByData("test-outlook.msg", "application/vnd.ms-outlook");
+ assertTypeByData("test-outlook2003.msg", "application/vnd.ms-outlook");
+ assertTypeByData("testVISIO.vsd", "application/vnd.visio");
+ assertTypeByData("testPUBLISHER.pub", "application/x-mspublisher");
+ assertTypeByData("testWORKS.wps", "application/vnd.ms-works");
+ assertTypeByData("testWORKS2000.wps", "application/vnd.ms-works");
+
+ // older Works Word Processor files can't be recognized
+ // they were created with Works Word Processor 7.0 (hence the text inside)
+ // and exported to the older formats with the "Save As" feature
+ assertTypeByData("testWORKSWordProcessor3.0.wps","application/vnd.ms-works");
+ assertTypeByData("testWORKSWordProcessor4.0.wps","application/vnd.ms-works");
+ assertTypeByData("testWORKSSpreadsheet7.0.xlr", "application/x-tika-msworks-spreadsheet");
+ assertTypeByData("testPROJECT2003.mpp", "application/vnd.ms-project");
+ assertTypeByData("testPROJECT2007.mpp", "application/vnd.ms-project");
+
+ // Excel95 can be detected by not parsed
+ assertTypeByData("testEXCEL_95.xls", "application/vnd.ms-excel");
+
+ // Try some ones that POI doesn't handle, that are still OLE2 based
+ assertTypeByData("testCOREL.shw", "application/x-corelpresentations");
+ assertTypeByData("testQUATTRO.qpw", "application/x-quattro-pro");
+ assertTypeByData("testQUATTRO.wb3", "application/x-quattro-pro");
+
+ assertTypeByData("testHWP_5.0.hwp", "application/x-hwp-v5");
+
+
+ // With the filename and data
+ assertTypeByNameAndData("testEXCEL.xls", "application/vnd.ms-excel");
+ assertTypeByNameAndData("testWORD.doc", "application/msword");
+ assertTypeByNameAndData("testPPT.ppt", "application/vnd.ms-powerpoint");
+
+ // With the wrong filename supplied, data will trump filename
+ assertTypeByNameAndData("testEXCEL.xls", "notWord.doc", "application/vnd.ms-excel");
+ assertTypeByNameAndData("testWORD.doc", "notExcel.xls", "application/msword");
+ assertTypeByNameAndData("testPPT.ppt", "notWord.doc", "application/vnd.ms-powerpoint");
+
+ // With a filename of a totally different type, data will trump filename
+ assertTypeByNameAndData("testEXCEL.xls", "notPDF.pdf", "application/vnd.ms-excel");
+ assertTypeByNameAndData("testEXCEL.xls", "notPNG.png", "application/vnd.ms-excel");
+ }
+
+ /**
+ * There is no way to distinguish "proper" StarOffice files from templates.
+ * All templates have the same extension but their actual type depends on
+ * the magic. Our current MimeTypes class doesn't allow us to use the same
+ * glob pattern in more than one mimetype.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testDetectStarOfficeFiles() throws Exception {
+ assertType("testStarOffice-5.2-calc.sdc",
+ "application/vnd.stardivision.calc",
+ "application/vnd.stardivision.calc");
+ assertType("testVORCalcTemplate.vor",
+ "application/vnd.stardivision.calc",
+ "application/vnd.stardivision.calc");
+ assertType("testStarOffice-5.2-draw.sda",
+ "application/vnd.stardivision.draw",
+ "application/vnd.stardivision.draw");
+ assertType("testVORDrawTemplate.vor",
+ "application/vnd.stardivision.draw",
+ "application/vnd.stardivision.draw");
+ assertType("testStarOffice-5.2-impress.sdd",
+ "application/vnd.stardivision.impress",
+ "application/vnd.stardivision.impress");
+ assertType("testVORImpressTemplate.vor",
+ "application/vnd.stardivision.impress",
+ "application/vnd.stardivision.impress");
+ assertType("testStarOffice-5.2-writer.sdw",
+ "application/vnd.stardivision.writer",
+ "application/vnd.stardivision.writer");
+ assertType("testVORWriterTemplate.vor",
+ "application/vnd.stardivision.writer",
+ "application/vnd.stardivision.writer");
+
+ }
+
+ @Test
+ public void testOpenContainer() throws Exception {
+ try (TikaInputStream stream = TikaInputStream.get(
+ TestContainerAwareDetector.class.getResource("/test-documents/testPPT.ppt"))) {
+ assertNull(stream.getOpenContainer());
+ assertEquals(
+ MediaType.parse("application/vnd.ms-powerpoint"),
+ detector.detect(stream, new Metadata()));
+ assertTrue(stream.getOpenContainer() instanceof NPOIFSFileSystem);
+ }
+ }
+
+ /**
+ * EPub uses a similar mimetype entry to OpenDocument for storing
+ * the mimetype within the parent zip file
+ */
+ @Test
+ public void testDetectEPub() throws Exception {
+ assertTypeByData("testEPUB.epub", "application/epub+zip");
+ assertTypeByData("testiBooks.ibooks", "application/x-ibooks+zip");
+ }
+
+ @Test
+ public void testDetectLotusNotesEml() throws Exception {
+ // Lotus .eml files aren't guaranteed to have any of the magic
+ // matches as the first line, but should have X-Notes-Item and Message-ID
+ assertTypeByData("testLotusEml.eml", "message/rfc822");
+ }
+
+ @Test
+ public void testDetectODF() throws Exception {
+ assertTypeByData("testODFwithOOo3.odt", "application/vnd.oasis.opendocument.text");
+ assertTypeByData("testOpenOffice2.odf", "application/vnd.oasis.opendocument.formula");
+ }
+
+ @Test
+ public void testDetectOOXML() throws Exception {
+ assertTypeByData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ assertTypeByData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+ assertTypeByData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+
+ // Check some of the less common OOXML types
+ assertTypeByData("testPPT.pptm", "application/vnd.ms-powerpoint.presentation.macroenabled.12");
+ assertTypeByData("testPPT.ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow");
+ assertTypeByData("testPPT.ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12");
+ assertTypeByData("testDOTM.dotm", "application/vnd.ms-word.template.macroEnabled.12");
+ assertTypeByData("testEXCEL.strict.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ assertTypeByData("testPPT.xps", "application/vnd.ms-xpsdocument");
+
+ assertTypeByData("testVISIO.vsdm", "application/vnd.ms-visio.drawing.macroenabled.12");
+ assertTypeByData("testVISIO.vsdx", "application/vnd.ms-visio.drawing");
+ assertTypeByData("testVISIO.vssm", "application/vnd.ms-visio.stencil.macroenabled.12");
+ assertTypeByData("testVISIO.vssx", "application/vnd.ms-visio.stencil");
+ assertTypeByData("testVISIO.vstm", "application/vnd.ms-visio.template.macroenabled.12");
+ assertTypeByData("testVISIO.vstx", "application/vnd.ms-visio.template");
+
+ // .xlsb is an OOXML file containing the binary parts, and not
+ // an OLE2 file as you might initially expect!
+ assertTypeByData("testEXCEL.xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12");
+
+ // With the filename and data
+ assertTypeByNameAndData("testEXCEL.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ assertTypeByNameAndData("testWORD.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+ assertTypeByNameAndData("testPPT.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+
+ // With the wrong filename supplied, data will trump filename
+ assertTypeByNameAndData("testEXCEL.xlsx", "notWord.docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ assertTypeByNameAndData("testWORD.docx", "notExcel.xlsx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+ assertTypeByNameAndData("testPPT.pptx", "notWord.docx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+
+ // With an incorrect filename of a different container type, data trumps filename
+ assertTypeByNameAndData("testEXCEL.xlsx", "notOldExcel.xls", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ }
+
+ /**
+ * Password Protected OLE2 files are fairly straightforward to detect, as they
+ * have the same structure as regular OLE2 files. (Core streams may be encrypted
+ * however)
+ */
+ @Test
+ public void testDetectProtectedOLE2() throws Exception {
+ assertTypeByData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+ assertTypeByData("testWORD_protected_passtika.doc", "application/msword");
+ assertTypeByData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+ assertTypeByNameAndData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+ assertTypeByNameAndData("testWORD_protected_passtika.doc", "application/msword");
+ assertTypeByNameAndData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+ }
+
+ /**
+ * Password Protected OOXML files are much more tricky beasts to work with.
+ * They have a very different structure to regular OOXML files, and instead
+ * of being ZIP based they are actually an OLE2 file which contains the
+ * OOXML structure within an encrypted stream.
+ * This makes detecting them much harder...
+ */
+ @Test
+ public void testDetectProtectedOOXML() throws Exception {
+ // Encrypted Microsoft Office OOXML files have OLE magic but
+ // special streams, so we can tell they're Protected OOXML
+ assertTypeByData("testEXCEL_protected_passtika.xlsx",
+ "application/x-tika-ooxml-protected");
+ assertTypeByData("testWORD_protected_passtika.docx",
+ "application/x-tika-ooxml-protected");
+ assertTypeByData("testPPT_protected_passtika.pptx",
+ "application/x-tika-ooxml-protected");
+
+ // At the moment, we can't use the name to specialise
+ // See discussions on TIKA-790 for details
+ assertTypeByNameAndData("testEXCEL_protected_passtika.xlsx",
+ "application/x-tika-ooxml-protected");
+ assertTypeByNameAndData("testWORD_protected_passtika.docx",
+ "application/x-tika-ooxml-protected");
+ assertTypeByNameAndData("testPPT_protected_passtika.pptx",
+ "application/x-tika-ooxml-protected");
+ }
+
+ /**
+ * Check that temporary files created by Tika are removed after
+ * closing TikaInputStream.
+ */
+ @Test
+ public void testRemovalTempfiles() throws Exception {
+ assertRemovalTempfiles("testWORD.docx");
+ assertRemovalTempfiles("test-documents.zip");
+ }
+
+ private int countTemporaryFiles() {
+ return new File(System.getProperty("java.io.tmpdir")).listFiles(
+ new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return name.startsWith("apache-tika-");
+ }
+ }).length;
+ }
+
+ private void assertRemovalTempfiles(String fileName) throws Exception {
+ int numberOfTempFiles = countTemporaryFiles();
+
+ try (TikaInputStream stream = TikaInputStream.get(
+ TestContainerAwareDetector.class.getResource("/test-documents/" + fileName))) {
+ detector.detect(stream, new Metadata());
+ }
+
+ assertEquals(numberOfTempFiles, countTemporaryFiles());
+ }
+
+ @Test
+ public void testDetectIWork() throws Exception {
+ assertTypeByData("testKeynote.key", "application/vnd.apple.keynote");
+ assertTypeByData("testNumbers.numbers", "application/vnd.apple.numbers");
+ assertTypeByData("testPages.pages", "application/vnd.apple.pages");
+ }
+
+ @Test
+ public void testDetectKMZ() throws Exception {
+ assertTypeByData("testKMZ.kmz", "application/vnd.google-earth.kmz");
+ }
+
+ @Test
+ public void testDetectIPA() throws Exception {
+ assertTypeByNameAndData("testIPA.ipa", "application/x-itunes-ipa");
+ assertTypeByData("testIPA.ipa", "application/x-itunes-ipa");
+ }
+
+ @Test
+ public void testASiC() throws Exception {
+ assertTypeByData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+ assertTypeByData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+ assertTypeByNameAndData("testASiCE.asice", "application/vnd.etsi.asic-e+zip");
+ assertTypeByNameAndData("testASiCS.asics", "application/vnd.etsi.asic-s+zip");
+ }
+
+ @Test
+ public void testDetectZip() throws Exception {
+ assertTypeByData("test-documents.zip", "application/zip");
+ assertTypeByData("test-zip-of-zip.zip", "application/zip");
+
+ // JAR based formats
+ assertTypeByData("testJAR.jar", "application/java-archive");
+ assertTypeByData("testWAR.war", "application/x-tika-java-web-archive");
+ assertTypeByData("testEAR.ear", "application/x-tika-java-enterprise-archive");
+ assertTypeByData("testAPK.apk", "application/vnd.android.package-archive");
+
+ // JAR with HTML files in it
+ assertTypeByNameAndData("testJAR_with_HTML.jar", "testJAR_with_HTML.jar",
+ "application/java-archive", "application/java-archive");
+ }
+
+ private TikaInputStream getTruncatedFile(String name, int n)
+ throws IOException {
+ try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream(
+ "/test-documents/" + name)) {
+ byte[] bytes = new byte[n];
+ int m = 0;
+ while (m < bytes.length) {
+ int i = input.read(bytes, m, bytes.length - m);
+ if (i != -1) {
+ m += i;
+ } else {
+ throw new IOException("Unexpected end of stream");
+ }
+ }
+ return TikaInputStream.get(bytes);
+ }
+ }
+
+ @Test
+ public void testTruncatedFiles() throws Exception {
+ // First up a truncated OOXML (zip) file
+
+ // With only the data supplied, the best we can do is the container
+ Metadata m = new Metadata();
+ try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+ assertEquals(
+ MediaType.application("x-tika-ooxml"),
+ detector.detect(xlsx, m));
+ }
+
+ // With truncated data + filename, we can use the filename to specialise
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
+ try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
+ assertEquals(
+ MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+ detector.detect(xlsx, m));
+ }
+
+ // Now a truncated OLE2 file
+ m = new Metadata();
+ try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+ assertEquals(
+ MediaType.application("x-tika-msoffice"),
+ detector.detect(xls, m));
+ }
+
+ // Finally a truncated OLE2 file, with a filename available
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
+ try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
+ assertEquals(
+ MediaType.application("vnd.ms-excel"),
+ detector.detect(xls, m));
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
new file mode 100644
index 0000000..45f68cc
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.embedder;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.txt.TXTParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Unit test for {@link ExternalEmbedder}s.
+ */
+public class ExternalEmbedderTest extends TikaTest {
+
+ static Path TMP_TEST_TXT;
+ protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
+ new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
+ protected static final String DEFAULT_CHARSET = UTF_8.name();
+ private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION = "dc:description";
+ private static final String TEST_TXT_PATH = "test-documents/testTXT.txt";
+
+ private TemporaryResources tmp = new TemporaryResources();
+
+ @BeforeClass
+ public static void copyTestFile() throws Exception {
+ TMP_TEST_TXT = Files.createTempFile("tika-test", "");
+ Files.copy(TikaTest.class.getClassLoader().getResourceAsStream(TEST_TXT_PATH),
+ TMP_TEST_TXT, StandardCopyOption.REPLACE_EXISTING);
+ }
+
+ @AfterClass
+ public static void rmTestFile() throws Exception {
+ Files.delete(TMP_TEST_TXT);
+ }
+
+ /**
+ * Gets the expected returned metadata value for the given field
+ *
+ * @param fieldName
+ * @return a prefix added to the field name
+ */
+ protected String getExpectedMetadataValueString(String fieldName, Date timestamp) {
+ return this.getClass().getSimpleName() + " embedded " + fieldName +
+ " on " + EXPECTED_METADATA_DATE_FORMATTER.format(timestamp);
+ }
+
+ /**
+ * Gets the tika <code>Metadata</code> object containing data to be
+ * embedded.
+ *
+ * @return the populated tika metadata object
+ */
+ protected Metadata getMetadataToEmbed(Date timestamp) {
+ Metadata metadata = new Metadata();
+ metadata.add(TikaCoreProperties.DESCRIPTION,
+ getExpectedMetadataValueString(TikaCoreProperties.DESCRIPTION.toString(), timestamp));
+ return metadata;
+ }
+
+ /**
+ * Gets the <code>Embedder</code> to test.
+ *
+ * @return the embedder under test
+ */
+ protected Embedder getEmbedder() {
+ ExternalEmbedder embedder = new ExternalEmbedder();
+ Map<Property, String[]> metadataCommandArguments = new HashMap<Property, String[]>(1);
+ metadataCommandArguments.put(TikaCoreProperties.DESCRIPTION,
+ new String[] { COMMAND_METADATA_ARGUMENT_DESCRIPTION });
+ embedder.setMetadataCommandArguments(metadataCommandArguments);
+ return embedder;
+ }
+
+ /**
+ * Gets the source input stream through standard Java resource loaders
+ * before metadata has been embedded.
+ *
+ * @return a fresh input stream
+ */
+ protected InputStream getSourceStandardInputStream() {
+ return this.getClass().getResourceAsStream(TEST_TXT_PATH);
+ }
+
+ /**
+ * Gets the source input stream via {@link TikaInputStream}
+ * before metadata has been embedded.
+ *
+ * @return a fresh input stream
+ * @throws FileNotFoundException
+ */
+ protected InputStream getSourceTikaInputStream() throws IOException {
+ return TikaInputStream.get(TMP_TEST_TXT);
+ }
+
+ /**
+ * Gets the parser to use to verify the result of the embed operation.
+ *
+ * @return the parser to read embedded metadata
+ */
+ protected Parser getParser() {
+ return new TXTParser();
+ }
+
+ /**
+ * Whether or not the final result of reading the now embedded metadata is
+ * expected in the output of the external tool
+ *
+ * @return whether or not results are expected in command line output
+ */
+ protected boolean getIsMetadataExpectedInOutput() {
+ return true;
+ }
+
+ /**
+ * Tests embedding metadata then reading metadata to verify the results.
+ *
+ * @param isResultExpectedInOutput whether or not results are expected in command line output
+ */
+ protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
+ Embedder embedder = getEmbedder();
+
+ // TODO Move this check to ExternalEmbedder
+ String os = System.getProperty("os.name", "");
+ if (os.contains("Windows")) {
+ // Skip test on Windows
+ return;
+ }
+
+ Date timestamp = new Date();
+ Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
+
+ try {
+ File tempOutputFile = tmp.createTemporaryFile();
+ FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
+
+ // Embed the metadata into a copy of the original output stream
+ embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
+
+ ParseContext context = new ParseContext();
+ Parser parser = getParser();
+ context.set(Parser.class, parser);
+
+ // Setup the extracting content handler
+ ByteArrayOutputStream result = new ByteArrayOutputStream();
+ OutputStreamWriter outputWriter = new OutputStreamWriter(result,DEFAULT_CHARSET);
+ ContentHandler handler = new BodyContentHandler(outputWriter);
+
+ // Create a new metadata object to read the new metadata into
+ Metadata embeddedMetadata = new Metadata();
+
+ // Setup a re-read of the now embeded temp file
+ FileInputStream embeddedFileInputStream = new FileInputStream(tempOutputFile);
+
+ parser.parse(embeddedFileInputStream, handler, embeddedMetadata,
+ context);
+
+ tmp.dispose();
+
+ String outputString = null;
+ if (isResultExpectedInOutput) {
+ outputString = result.toString(DEFAULT_CHARSET);
+ } else {
+ assertTrue("no metadata found", embeddedMetadata.size() > 0);
+ }
+
+ // Check each metadata property for the expected value
+ for (String metadataName : metadataToEmbed.names()) {
+ if (metadataToEmbed.get(metadataName) != null) {
+ String expectedValue = metadataToEmbed.get(metadataName);
+ boolean foundExpectedValue = false;
+ if (isResultExpectedInOutput) {
+ // just check that the entire output contains the expected string
+ foundExpectedValue = outputString.contains(expectedValue);
+ } else {
+ if (embeddedMetadata.isMultiValued(metadataName)) {
+ for (String embeddedValue : embeddedMetadata.getValues(metadataName)) {
+ if (embeddedValue != null) {
+ if (embeddedValue.contains(expectedValue)) {
+ foundExpectedValue = true;
+ break;
+ }
+ }
+ }
+ } else {
+ String embeddedValue = embeddedMetadata.get(metadataName);
+ assertNotNull("expected metadata for "
+ + metadataName + " not found",
+ embeddedValue);
+ foundExpectedValue = embeddedValue.contains(expectedValue);
+ }
+ }
+ assertTrue(
+ "result did not contain expected appended metadata "
+ + metadataName + "="
+ + expectedValue,
+ foundExpectedValue);
+ }
+ }
+ } catch (IOException e) {
+ fail(e.getMessage());
+ } catch (TikaException e) {
+ fail(e.getMessage());
+ } catch (SAXException e) {
+ fail(e.getMessage());
+ }
+ }
+
+ protected void checkSourceFileExists() {
+ String message = "the original input file was deleted";
+ assertNotNull(message, TMP_TEST_TXT);
+ assertTrue(message, Files.isRegularFile(TMP_TEST_TXT));
+ }
+
+ /**
+ * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testEmbedStandardInputStream() throws IOException {
+ embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
+ checkSourceFileExists();
+ }
+
+ /**
+ * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testEmbedTikaInputStream() throws IOException {
+ embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
+ checkSourceFileExists();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
new file mode 100644
index 0000000..447042b
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypeTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypeTest {
+
+ private MimeTypes types;
+ private MimeType text;
+
+ @Before
+ public void setUp() throws MimeTypeException {
+ types = new MimeTypes();
+ text = types.forName("text/plain");
+ }
+
+ /** Test MimeType constructor */
+ @Test
+ public void testConstrctor() {
+ // Missing name
+ try {
+ new MimeType(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ }
+
+ @Test
+ public void testIsValidName() {
+ assertTrue(MimeType.isValid("application/octet-stream"));
+ assertTrue(MimeType.isValid("text/plain"));
+ assertTrue(MimeType.isValid("foo/bar"));
+ assertTrue(MimeType.isValid("a/b"));
+
+ assertFalse(MimeType.isValid("application"));
+ assertFalse(MimeType.isValid("application/"));
+ assertFalse(MimeType.isValid("/"));
+ assertFalse(MimeType.isValid("/octet-stream"));
+ assertFalse(MimeType.isValid("application//octet-stream"));
+ assertFalse(MimeType.isValid("application/octet=stream"));
+ assertFalse(MimeType.isValid("application/\u00f6ctet-stream"));
+ assertFalse(MimeType.isValid("text/plain;"));
+ assertFalse(MimeType.isValid("text/plain; charset=UTF-8"));
+ try {
+ MimeType.isValid(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ }
+
+ /** Test MimeType setDescription() */
+ @Test
+ public void testSetEmptyValues() {
+ try {
+ text.setDescription(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+
+ try {
+ text.setAcronym(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+
+ try {
+ text.addLink(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+
+ try {
+ text.setUniformTypeIdentifier(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
new file mode 100644
index 0000000..be8a575
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/mime/MimeTypesTest.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.mime;
+
+import static org.apache.tika.mime.MediaType.OCTET_STREAM;
+import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypesTest {
+
+ private MimeTypes types;
+
+ private MediaTypeRegistry registry;
+
+ private MimeType binary;
+
+ private MimeType text;
+
+ private MimeType html;
+
+ @Before
+ public void setUp() throws MimeTypeException {
+ types = new MimeTypes();
+ registry = types.getMediaTypeRegistry();
+ binary = types.forName("application/octet-stream");
+ text = types.forName("text/plain");
+ types.addAlias(text, MediaType.parse("text/x-plain"));
+ html = types.forName("text/html");
+ types.setSuperType(html, TEXT_PLAIN);
+ }
+
+ @Test
+ public void testForName() throws MimeTypeException {
+ assertEquals(text, types.forName("text/plain"));
+ assertEquals(text, types.forName("TEXT/PLAIN"));
+
+ try {
+ types.forName("invalid");
+ fail("MimeTypeException not thrown on invalid type name");
+ } catch (MimeTypeException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testRegisteredMimes() throws MimeTypeException {
+ String dummy = "text/xxxxx";
+ assertEquals(text, types.getRegisteredMimeType("text/plain"));
+ assertNull(types.getRegisteredMimeType(dummy));
+ assertNotNull(types.forName(dummy));
+ assertEquals(dummy, types.forName("text/xxxxx").getType().toString());
+ assertEquals(dummy, types.getRegisteredMimeType("text/xxxxx").getType().toString());
+
+ try {
+ types.forName("invalid");
+ fail("MimeTypeException not thrown on invalid type name");
+ } catch (MimeTypeException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testSuperType() throws MimeTypeException {
+ assertNull(registry.getSupertype(OCTET_STREAM));
+ assertEquals(OCTET_STREAM, registry.getSupertype(TEXT_PLAIN));
+ assertEquals(TEXT_PLAIN, registry.getSupertype(html.getType()));
+ }
+
+ @Test
+ public void testIsDescendantOf() {
+ assertFalse(registry.isSpecializationOf(OCTET_STREAM, OCTET_STREAM));
+ assertFalse(registry.isSpecializationOf(TEXT_PLAIN, TEXT_PLAIN));
+ assertFalse(registry.isSpecializationOf(html.getType(), html.getType()));
+
+ assertTrue(registry.isSpecializationOf(html.getType(), OCTET_STREAM));
+ assertFalse(registry.isSpecializationOf(OCTET_STREAM, html.getType()));
+
+ assertTrue(registry.isSpecializationOf(html.getType(), TEXT_PLAIN));
+ assertFalse(registry.isSpecializationOf(TEXT_PLAIN, html.getType()));
+
+ assertTrue(registry.isSpecializationOf(TEXT_PLAIN, OCTET_STREAM));
+ assertFalse(registry.isSpecializationOf(OCTET_STREAM, TEXT_PLAIN));
+ }
+
+ @Test
+ public void testCompareTo() {
+ assertTrue(binary.compareTo(binary) == 0);
+ assertTrue(binary.compareTo(text) != 0);
+ assertTrue(binary.compareTo(html) != 0);
+
+ assertTrue(text.compareTo(binary) != 0);
+ assertTrue(text.compareTo(text) == 0);
+ assertTrue(text.compareTo(html) != 0);
+
+ assertTrue(html.compareTo(binary) != 0);
+ assertTrue(html.compareTo(text) != 0);
+ assertTrue(html.compareTo(html) == 0);
+ }
+
+}