You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/06/17 18:05:44 UTC

svn commit: r1603208 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pdf/PDFParser.java test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Author: tallison
Date: Tue Jun 17 16:05:44 2014
New Revision: 1603208

URL: http://svn.apache.org/r1603208
Log:
TIKA-1341: fix double endDocument in PDFParser

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1603208&r1=1603207&r2=1603208&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Tue Jun 17 16:05:44 2014
@@ -162,7 +162,6 @@ public class PDFParser extends AbstractP
             }
             tmp.dispose();
         }
-        handler.endDocument();
     }
 
    

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1603208&r1=1603207&r2=1603208&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Tue Jun 17 16:05:44 2014
@@ -46,6 +46,7 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ContentHandlerDecorator;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 /**
@@ -627,6 +628,7 @@ public class PDFParserTest extends TikaT
 */
 
     //TIKA-1226
+    @Test
     public void testSignatureInAcroForm() throws Exception {
         //The current test doc does not contain any content in the signature area.
         //This just tests that a RuntimeException is not thrown.
@@ -672,6 +674,21 @@ public class PDFParserTest extends TikaT
         assertEquals(TYPE_DOC.toString(), metadatas.get(3).get(Metadata.CONTENT_TYPE));
     }
 
+
+    @Test
+    public void testSingleCloseDoc() throws Exception {
+        //TIKA-1341
+        InputStream is = PDFParserTest.class.getResourceAsStream(
+                "/test-documents/testPDFTripleLangTitle.pdf");
+        Parser p = new AutoDetectParser();
+        Metadata m = new Metadata();
+        ParseContext c = new ParseContext();
+        ContentHandler h = new EventCountingHandler();
+        p.parse(is, h,  m,  c);
+        assertEquals(1, ((EventCountingHandler)h).getEndDocument());
+    }
+
+    @Test
     public void testVersions() throws Exception {
         
         Map<String, String> dcFormat = new HashMap<String, String>();
@@ -976,6 +993,25 @@ public class PDFParserTest extends TikaT
     }
 
 
+    /**
+     * 
+     * Simple class to count end of document events.  If functionality is useful,
+     * move to org.apache.tika in src/test
+     *
+     */
+    private class EventCountingHandler extends ContentHandlerDecorator {
+        private int endDocument = 0;
+        
+        @Override
+        public void endDocument() {
+            endDocument++;
+        }
+        
+        public int getEndDocument() {
+            return endDocument;
+        }
+    }
+
     private class AvoidInlineSelector implements DocumentSelector {
 
         @Override