You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/05/19 15:44:11 UTC
svn commit: r1124772 - in /tika/trunk:
tika-core/src/test/java/org/apache/tika/mime/
tika-core/src/test/java/org/apache/tika/parser/
tika-parsers/src/test/java/org/apache/tika/mime/
tika-parsers/src/test/java/org/apache/tika/parser/
Author: nick
Date: Thu May 19 13:44:10 2011
New Revision: 1124772
URL: http://svn.apache.org/viewvc?rev=1124772&view=rev
Log:
TIKA-660 Merge the two CompositeParserTests and PatternsTests into one each in core
Added:
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java
- copied unchanged from r1124691, tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/DummyParser.java
Removed:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/PatternsTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/CompositeParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/DummyParser.java
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/PatternsTest.java
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/PatternsTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/PatternsTest.java?rev=1124772&r1=1124771&r2=1124772&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/PatternsTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/PatternsTest.java Thu May 19 13:44:10 2011
@@ -21,11 +21,59 @@ import java.util.List;
import junit.framework.TestCase;
public class PatternsTest extends TestCase {
+ private MimeTypes fullTypes = MimeTypes.getDefaultMimeTypes();
- private MimeTypes types = MimeTypes.getDefaultMimeTypes();
+ private Patterns patterns;
+ private MimeTypes types;
+ private MimeType text;
+
+ protected void setUp() throws MimeTypeException {
+ patterns = new Patterns(new MediaTypeRegistry());
+ types = new MimeTypes();
+ text = types.forName("text/plain");
+ }
+
+ /** Test add() */
+ public void testAdd() throws MimeTypeException {
+ try {
+ patterns.add(null, text);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ try {
+ patterns.add("", null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ try {
+ patterns.add(null, null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ }
+
+ /** Test matches() */
+ public void testMatches() {
+ try {
+ patterns.matches(null);
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected result
+ }
+ }
+
+ public void testExtension() throws MimeTypeException {
+ MimeType doc = types.forName("application/vnd.ms-word");
+ patterns.add("*.doc", doc);
+
+ assertEquals(".doc", doc.getExtension());
+ }
public void testExtensions() throws Exception{
- MimeType jpeg = types.forName("image/jpeg");
+ MimeType jpeg = fullTypes.forName("image/jpeg");
assertEquals(".jpg", jpeg.getExtension());
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java?rev=1124772&r1=1124771&r2=1124772&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java Thu May 19 13:44:10 2011
@@ -16,15 +16,23 @@
*/
package org.apache.tika.parser;
+import java.io.ByteArrayInputStream;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import junit.framework.TestCase;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
public class CompositeParserTest extends TestCase {
@@ -57,4 +65,86 @@ public class CompositeParserTest extends
assertEquals(b, parsers.get(1));
}
+ public void testDefaultParser() throws Exception {
+ TikaConfig config = TikaConfig.getDefaultConfig();
+
+ CompositeParser parser = (CompositeParser)config.getParser();
+
+ // Check it has the full registry
+ assertEquals(config.getMediaTypeRegistry(), parser.getMediaTypeRegistry());
+ }
+
+ public void testMimeTypeAliases() throws Exception {
+ MediaType bmpCanonical = MediaType.image("x-ms-bmp");
+ Map<String,String> bmpCanonicalMetadata = new HashMap<String, String>();
+ bmpCanonicalMetadata.put("BMP", "True");
+ bmpCanonicalMetadata.put("Canonical", "True");
+ Parser bmpCanonicalParser = new DummyParser(
+ new HashSet<MediaType>(Arrays.asList(bmpCanonical)),
+ bmpCanonicalMetadata, null
+ );
+
+ MediaType bmpAlias = MediaType.image("bmp");
+ Map<String,String> bmpAliasMetadata = new HashMap<String, String>();
+ bmpAliasMetadata.put("BMP", "True");
+ bmpAliasMetadata.put("Alias", "True");
+ Parser bmpAliasParser = new DummyParser(
+ new HashSet<MediaType>(Arrays.asList(bmpAlias)),
+ bmpAliasMetadata, null
+ );
+
+ TikaConfig config = TikaConfig.getDefaultConfig();
+ CompositeParser canonical = new CompositeParser(
+ config.getMediaTypeRegistry(), bmpCanonicalParser
+ );
+ CompositeParser alias = new CompositeParser(
+ config.getMediaTypeRegistry(), bmpAliasParser
+ );
+ CompositeParser both = new CompositeParser(
+ config.getMediaTypeRegistry(), bmpCanonicalParser, bmpAliasParser
+ );
+
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata;
+
+ // Canonical and Canonical
+ metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, bmpCanonical.toString());
+ canonical.parse(new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
+ assertEquals("True", metadata.get("BMP"));
+ assertEquals("True", metadata.get("Canonical"));
+
+
+ // Alias and Alias
+ metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, bmpAlias.toString());
+ alias.parse(new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
+ assertEquals("True", metadata.get("BMP"));
+ assertEquals("True", metadata.get("Alias"));
+
+
+ // Alias type and Canonical parser
+ metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, bmpAlias.toString());
+ canonical.parse(new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
+ assertEquals("True", metadata.get("BMP"));
+ assertEquals("True", metadata.get("Canonical"));
+
+
+ // Canonical type and Alias parser
+ metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, bmpCanonical.toString());
+ alias.parse(new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
+ assertEquals("True", metadata.get("BMP"));
+ assertEquals("True", metadata.get("Alias"));
+
+
+ // And when both are there, will go for the last one
+ // to be registered (which is the alias one)
+ metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, bmpCanonical.toString());
+ both.parse(new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
+ assertEquals("True", metadata.get("BMP"));
+ assertEquals("True", metadata.get("Alias"));
+ }
}