You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by rg...@apache.org on 2012/12/12 00:30:19 UTC
svn commit: r1420484 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
tika-parsers/src/test/resources/test-documents/testLotusEml.eml
Author: rgauss
Date: Tue Dec 11 23:30:18 2012
New Revision: 1420484
URL: http://svn.apache.org/viewvc?rev=1420484&view=rev
Log:
TIKA-1042: Lotus Notes .eml Files Not Always Detected Properly
- Added testLotusEml.eml which demonstrates the problem (with some info redacted)
- Added testDetectLotusNotesEml method to TestContainerAwareDetector
- Added new match to the message/rfc822 mime-type which looks for X-Notes-Item and Message-ID
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testLotusEml.eml
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1420484&r1=1420483&r2=1420484&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Tue Dec 11 23:30:18 2012
@@ -4198,8 +4198,11 @@
<match value="Return-Path:" type="string" offset="0"/>
<match value="From:" type="string" offset="0"/>
<match value="Received:" type="string" offset="0"/>
- <match type="string" value="Message-ID:" offset="0"/>
- <match type="string" value="Date:" offset="0"/>
+ <match value="Message-ID:" type="string" offset="0"/>
+ <match value="Date:" type="string" offset="0"/>
+ <match value="X-Notes-Item:" type="string" offset="0">
+ <match value="Message-ID:" type="string" offset="0:8192"/>
+ </match>
</magic>
<glob pattern="*.eml"/>
<glob pattern="*.mime"/>
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1420484&r1=1420483&r2=1420484&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Tue Dec 11 23:30:18 2012
@@ -163,6 +163,12 @@ public class TestContainerAwareDetector
assertTypeByData("testEPUB.epub", "application/epub+zip");
assertTypeByData("testiBooks.ibooks", "application/x-ibooks+zip");
}
+
+ public void testDetectLotusNotesEml() throws Exception {
+ // Lotus .eml files aren't guaranteed to have any of the magic
+ // matches as the first line, but should have X-Notes-Item and Message-ID
+ assertTypeByData("testLotusEml.eml", "message/rfc822");
+ }
public void testDetectODF() throws Exception {
assertTypeByData("testODFwithOOo3.odt", "application/vnd.oasis.opendocument.text");
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testLotusEml.eml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testLotusEml.eml?rev=1420484&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testLotusEml.eml (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testLotusEml.eml Tue Dec 11 23:30:18 2012
@@ -0,0 +1,71 @@
+X-Notes-Item: CN=XXXX/OU=XX/OU=XXXX/O=XXX/C=XX;
+ flags=44; name=ForwardedFrom
+X-Notes-Item: Thu, 4 Oct 2012 13:01:56 +0200;
+ type=400; name=ForwardedDate
+X-Notes-Item: Thu, 4 Oct 2012 13:02:39 +0200;
+ type=400; name=OriginalModTime
+X-Notes-Item: .;
+ name=$StorageTo
+X-Notes-Item: 62AEA923:7CEE804A-C1257A8D:003C970D;
+ type=4; name=$Orig
+X-Notes-Item: 2031619;
+ name=MIMEMailHeaderCharset
+MIME-Version: 1.0
+X-Mailer: Lotus Notes Release 8.5.3 September 15, 2011
+Message-ID: <OF...@LocalDomain>
+Date: Thu, 4 Oct 2012 13:02:39 +0200
+X-Notes-Item: 0;
+ name=MAILOPTIONS
+X-Notes-Item: 1;
+ name=SaveOptions
+X-Notes-Item: Memo;
+ name=Form
+From: XXX.XXX@XXXX.de
+X-Notes-Item: CN=XXXX/OU=XX/OU=XXXX/O=XXX/C=XX;
+ name=AltFrom
+X-Notes-Item: StdNotesLtr25;
+ name=Logo
+X-Notes-Item: StdNotesLtr25;
+ name=dLogo
+X-Notes-Item: True;
+ name=useApplet
+X-Notes-Item: 1;
+ name=DefaultMailSaveOptions
+X-Notes-Item: ;
+ name=Query_String
+X-Notes-Item: 1;
+ name=ExpandPersonalGroups
+To: XXXX@alfresco.com
+X-Notes-Item: ;
+ flags=44; name=INetCopyTo
+X-Notes-Item: ;
+ flags=44; name=INetBlindCopyTo
+X-Notes-Item: ;
+ name=tmpImp
+X-Notes-Item: ;
+ name=Sign
+X-Notes-Item: ;
+ name=Encrypt
+X-Notes-Item: ;
+ name=tmpClassification
+X-Notes-Item: ;
+ name=SetClassification
+X-Notes-Item: 1;
+ name=$NoteHasNativeMIME
+X-MIMETrack: Serialize by Notes Client on XXXX(Release
+ 8.5.3|September 15, 2011) at 25.10.2012 12:14:50
+Content-type: multipart/related;
+ Boundary="0__=4EBBF01EDFAF119D8f9e8a93df938690918c4EBBF01EDFAF119D"
+Content-Disposition: inline
+
+--0__=4EBBF01EDFAF119D8f9e8a93df938690918c4EBBF01EDFAF119D
+Content-type: text/html; charset=ISO-8859-1
+Content-Disposition: inline
+Content-transfer-encoding: quoted-printable
+
+<html><body>
+Message body
+</body></html>=
+
+--0__=4EBBF01EDFAF119D8f9e8a93df938690918c4EBBF01EDFAF119D--
+