You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/10/24 00:37:46 UTC

[tika] branch master updated (96a3502 -> 877d621)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 96a3502  update some unit tests to use the RecursiveParserWrapper
     add 4de0c66  TIKA-2455: flag the containing multipart type
     add 33da38e  TIKA-2455: test for feature; only store multipart subtype in metadata
     add b5f5403  Merge branch 'master' into patch-2
     new a01163d  Merge branch 'mattcg-patch-2'
     new 877d621  update a unit tests to use the RecursiveParserWrapper. This closes 205.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/tika/metadata/Message.java     |  4 ++++
 .../tika/parser/mail/MailContentHandler.java       | 19 +++++++++++++++++
 .../apache/tika/parser/mail/RFC822ParserTest.java  | 24 ++++++++++++++++++++++
 3 files changed, 47 insertions(+)

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].

[tika] 01/02: Merge branch 'mattcg-patch-2'

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a01163de9784cff77733262dad46140e83eb6e11
Merge: 96a3502 b5f5403
Author: tballison <ta...@mitre.org>
AuthorDate: Mon Oct 23 20:32:24 2017 -0400

    Merge branch 'mattcg-patch-2'

 .../java/org/apache/tika/metadata/Message.java     |  4 ++
 .../tika/parser/mail/MailContentHandler.java       | 19 ++++++++
 .../apache/tika/parser/mail/RFC822ParserTest.java  | 55 ++++++++++++++++++++++
 3 files changed, 78 insertions(+)

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 02/02: update a unit tests to use the RecursiveParserWrapper. This closes 205.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 877d62125dc6c670351eb84fc3422ed347db457a
Author: tballison <ta...@mitre.org>
AuthorDate: Mon Oct 23 20:37:34 2017 -0400

    update a unit tests to use the RecursiveParserWrapper. This closes 205.
---
 .../apache/tika/parser/mail/RFC822ParserTest.java  | 55 +++++-----------------
 1 file changed, 12 insertions(+), 43 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 5f93961..ce1d7f9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -577,56 +577,25 @@ public class RFC822ParserTest extends TikaTest {
 
     @Test
     public void testMultipartFlags() throws Exception {
-        final ContentHandler handler = new BodyContentHandler();
-        final Metadata metadata = new Metadata();
-        final Parser parser = new RFC822Parser();
-        final ParseContext context = new ParseContext();
-        final Parser autoDetectParser = new AutoDetectParser();
-
-        final List<Metadata> metadataList = new ArrayList<Metadata>();
-
-        context.set(EmbeddedDocumentExtractor.class, new EmbeddedDocumentExtractor() {
-
-            @Override
-            public boolean shouldParseEmbedded(Metadata metadata) {
-                return true;
-            }
-
-            @Override
-            public void parseEmbedded(InputStream stream, ContentHandler handler,
-                                      Metadata metadata, boolean outputHtml) throws SAXException,
-                    IOException {
-                try {
-                    autoDetectParser.parse(stream, new BodyContentHandler(), metadata, new ParseContext());
-                } catch (TikaException e) {
-                    throw new RuntimeException(e);
-                }
-
-                metadataList.add(metadata);
-            }
-        });
-
-        try (InputStream stream = getStream("test-documents/testRFC822-multipart")) {
-            parser.parse(stream, handler, metadata, context);
-        }
 
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-multipart");
         // Check the root metadata.
-        assertTrue(metadata.get(Message.MULTIPART_SUBTYPE).equals("mixed"));
-        assertTrue(metadata.get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ee04913db790"));
+        assertEquals("mixed", metadataList.get(0).get(Message.MULTIPART_SUBTYPE));
+        assertEquals("0016e64606800312ee04913db790", metadataList.get(0).get(Message.MULTIPART_BOUNDARY));
 
         // Check the metadata of the first alternative.
-        assertTrue(metadataList.get(0).get(Metadata.CONTENT_TYPE).equals("text/plain; charset=UTF-8"));
-        assertTrue(metadataList.get(0).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
-        assertTrue(metadataList.get(0).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
-
-        // Check the metadata of the second alternative.
-        assertTrue(metadataList.get(1).get(Metadata.CONTENT_TYPE).equals("text/html; charset=UTF-8"));
+        assertTrue(metadataList.get(1).get(Metadata.CONTENT_TYPE).equals("text/plain; charset=UTF-8"));
         assertTrue(metadataList.get(1).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
         assertTrue(metadataList.get(1).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
 
+        // Check the metadata of the second alternative.
+        assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).equals("text/html; charset=UTF-8"));
+        assertTrue(metadataList.get(2).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
+        assertTrue(metadataList.get(2).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
+
         // Check the metadata of the attached GIF.
-        assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).equals("image/gif"));
-        assertTrue(metadataList.get(2).get(Message.MULTIPART_SUBTYPE) == null);
-        assertTrue(metadataList.get(2).get(Message.MULTIPART_BOUNDARY) == null);
+        assertTrue(metadataList.get(3).get(Metadata.CONTENT_TYPE).equals("image/gif"));
+        assertTrue(metadataList.get(3).get(Message.MULTIPART_SUBTYPE) == null);
+        assertTrue(metadataList.get(3).get(Message.MULTIPART_BOUNDARY) == null);
     }
 }

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.