You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2012/05/25 20:13:53 UTC

svn commit: r1342753 - in /tika/trunk: ./ tika-parsers/src/main/java/org/apache/tika/parser/iwork/ tika-parsers/src/test/java/org/apache/tika/parser/iwork/ tika-parsers/src/test/resources/test-documents/

Author: mikemccand
Date: Fri May 25 18:13:52 2012
New Revision: 1342753

URL: http://svn.apache.org/viewvc?rev=1342753&view=rev
Log:
TIKA-923: extract items from Keynote master slides too

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testMasterSlideTable.key   (with props)
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1342753&r1=1342752&r2=1342753&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri May 25 18:13:52 2012
@@ -21,7 +21,8 @@ Release 1.2 - Current Development
     yet (TIKA-903).  Text extracted from Keynote text boxes and bullet
     points no longer runs together (TIKA-910). Also extract text for
     Pages documents created in layout mode (TIKA-904).  Table names
-    are now extracted in Numbers documents (TIKA-924).
+    are now extracted in Numbers documents (TIKA-924).  Content added
+    to master slides is also extracted (TIKA-923).
 
 Release 1.1 - 3/7/2012
 ---------------------------------

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java?rev=1342753&r1=1342752&r2=1342753&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java Fri May 25 18:13:52 2012
@@ -67,6 +67,9 @@ class KeynoteContentHandler extends Defa
             inSlide = true;
             numberOfSlides++;
             xhtml.startElement("div");
+        } else if ("key:master-slide".equals(qName)) {
+            inSlide = true;
+            xhtml.startElement("div");
         } else if ("key:title-placeholder".equals(qName) && inSlide) {
             inTitle = true;
             xhtml.startElement("h1");
@@ -117,6 +120,9 @@ class KeynoteContentHandler extends Defa
         } else if ("key:slide".equals(qName)) {
             inSlide = false;
             xhtml.endElement("div");
+        } else if ("key:master-slide".equals(qName)) {
+            inSlide = false;
+            xhtml.endElement("div");
         } else if ("key:title-placeholder".equals(qName) && inSlide) {
             inTitle = false;
             xhtml.endElement("h1");

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java?rev=1342753&r1=1342752&r2=1342753&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java Fri May 25 18:13:52 2012
@@ -124,6 +124,20 @@ public class IWorkParserTest extends Tes
         assertTrue(content.contains("row 1 row 2 row 3"));
     }
 
+    // TIKA-923
+    public void testKeynoteMasterSlideTable() throws Exception {
+        InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testMasterSlideTable.key");
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new BodyContentHandler();
+        iWorkParser.parse(input, handler, metadata, parseContext);
+
+        String content = handler.toString();
+        content = content.replaceAll("\\s+", " ");
+        assertTrue(content.contains("master row 1"));
+        assertTrue(content.contains("master row 2"));
+        assertTrue(content.contains("master row 3"));
+    }
+
     public void testParsePages() throws Exception {
         InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testPages.pages");
         Metadata metadata = new Metadata();

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testMasterSlideTable.key
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMasterSlideTable.key?rev=1342753&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testMasterSlideTable.key
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream