You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/06/17 18:05:44 UTC
svn commit: r1603208 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/pdf/PDFParser.java
test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Author: tallison
Date: Tue Jun 17 16:05:44 2014
New Revision: 1603208
URL: http://svn.apache.org/r1603208
Log:
TIKA-1341: fix double endDocument in PDFParser
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1603208&r1=1603207&r2=1603208&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Tue Jun 17 16:05:44 2014
@@ -162,7 +162,6 @@ public class PDFParser extends AbstractP
}
tmp.dispose();
}
- handler.endDocument();
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1603208&r1=1603207&r2=1603208&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Tue Jun 17 16:05:44 2014
@@ -46,6 +46,7 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ContentHandlerDecorator;
import org.junit.Test;
import org.xml.sax.ContentHandler;
/**
@@ -627,6 +628,7 @@ public class PDFParserTest extends TikaT
*/
//TIKA-1226
+ @Test
public void testSignatureInAcroForm() throws Exception {
//The current test doc does not contain any content in the signature area.
//This just tests that a RuntimeException is not thrown.
@@ -672,6 +674,21 @@ public class PDFParserTest extends TikaT
assertEquals(TYPE_DOC.toString(), metadatas.get(3).get(Metadata.CONTENT_TYPE));
}
+
+ @Test
+ public void testSingleCloseDoc() throws Exception {
+ //TIKA-1341
+ InputStream is = PDFParserTest.class.getResourceAsStream(
+ "/test-documents/testPDFTripleLangTitle.pdf");
+ Parser p = new AutoDetectParser();
+ Metadata m = new Metadata();
+ ParseContext c = new ParseContext();
+ ContentHandler h = new EventCountingHandler();
+ p.parse(is, h, m, c);
+ assertEquals(1, ((EventCountingHandler)h).getEndDocument());
+ }
+
+ @Test
public void testVersions() throws Exception {
Map<String, String> dcFormat = new HashMap<String, String>();
@@ -976,6 +993,25 @@ public class PDFParserTest extends TikaT
}
+ /**
+ *
+ * Simple class to count end of document events. If functionality is useful,
+ * move to org.apache.tika in src/test
+ *
+ */
+ private class EventCountingHandler extends ContentHandlerDecorator {
+ private int endDocument = 0;
+
+ @Override
+ public void endDocument() {
+ endDocument++;
+ }
+
+ public int getEndDocument() {
+ return endDocument;
+ }
+ }
+
private class AvoidInlineSelector implements DocumentSelector {
@Override