You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/03/14 21:26:37 UTC
svn commit: r1081547 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/
tika-parsers/src/test/java/org/apache/tika/parser/microsoft/
tika-parsers/src/test/resources/test-documents/
Author: nick
Date: Mon Mar 14 20:26:36 2011
New Revision: 1081547
URL: http://svn.apache.org/viewvc?rev=1081547&view=rev
Log:
Fix the mime magic detection of TNEF files, and add a unit test for it. (The rest of the TNEF support will be committed when POI 3.8 beta 2 is out). (TIKA-615)
Added:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat (with props)
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1081547&r1=1081546&r2=1081547&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Mon Mar 14 20:26:36 2011
@@ -1291,7 +1291,7 @@
<mime-type type="application/vnd.ms-tnef">
<alias type="application/ms-tnef" />
<magic priority="50">
- <match value="0x223e9f78" type="little16" offset="0" />
+ <match value="0x223e9f78" type="little32" offset="0" />
</magic>
</mime-type>
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1081547&r1=1081546&r2=1081547&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Mon Mar 14 20:26:36 2011
@@ -49,13 +49,7 @@ public abstract class AbstractPOIContain
public static final MediaType TYPE_EMF = MediaType.application("x-msmetafile");
protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
- InputStream input = AbstractPOIContainerExtractionTest.class.getResourceAsStream(
- "/test-documents/" + filename);
- assertNotNull(filename + " not found", input);
-
- TikaInputStream stream = TikaInputStream.get(input);
- assertNotNull(stream);
-
+ TikaInputStream stream = getTestFile(filename);
assertEquals(true, extractor.isSupported(stream));
// Process it
@@ -70,6 +64,17 @@ public abstract class AbstractPOIContain
return handler;
}
+ protected TikaInputStream getTestFile(String filename) throws Exception {
+ InputStream input = AbstractPOIContainerExtractionTest.class.getResourceAsStream(
+ "/test-documents/" + filename);
+ assertNotNull(filename + " not found", input);
+
+ TikaInputStream stream = TikaInputStream.get(input);
+ assertNotNull(stream);
+
+ return stream;
+ }
+
protected static class TrackingHandler implements EmbeddedResourceHandler {
public List<String> filenames = new ArrayList<String>();
public List<MediaType> mediaTypes = new ArrayList<MediaType>();
Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java?rev=1081547&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java Mon Mar 14 20:26:36 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft;
+
+import org.apache.tika.detect.ContainerAwareDetector;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+
+/**
+ * Tests for the TNEF (winmail.dat) parser
+ */
+public class TNEFParserTest extends AbstractPOIContainerExtractionTest {
+ private static final String file = "testWINMAIL.dat";
+
+ public void testBasics() throws Exception {
+ TikaInputStream stream = getTestFile(file);
+ ContainerAwareDetector detector =
+ new ContainerAwareDetector(MimeTypes.getDefaultMimeTypes());
+
+ try {
+ assertEquals(
+ MediaType.application("vnd.ms-tnef"),
+ detector.detect(stream, new Metadata()));
+ } finally {
+ stream.close();
+ }
+ }
+}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat?rev=1081547&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream