You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/09/26 19:02:22 UTC
[1/2] tika git commit: fix for TIKA-2098 contributed by alexshadow007
Repository: tika
Updated Branches:
refs/heads/master 308d26fb2 -> 0a4b0e80b
fix for TIKA-2098 contributed by alexshadow007
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c33ac046
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c33ac046
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c33ac046
Branch: refs/heads/master
Commit: c33ac04618f97c06fe4508b5d41465b2c11ba1b9
Parents: ce07d8a
Author: Alexander Kazakov <al...@gmail.com>
Authored: Mon Sep 26 21:48:11 2016 +0300
Committer: Alexander Kazakov <al...@gmail.com>
Committed: Mon Sep 26 21:48:11 2016 +0300
----------------------------------------------------------------------
.../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++-----
.../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++
2 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 34a3aff..5dd0680 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -122,6 +122,10 @@ class PDF2XHTML extends AbstractPDF2XHTML {
}
});
+ if (pdf2XHTML.exceptions.size() > 0) {
+ //throw the first
+ throw pdf2XHTML.exceptions.get(0);
+ }
} catch (IOException e) {
if (e.getCause() instanceof SAXException) {
throw (SAXException) e.getCause();
@@ -129,11 +133,6 @@ class PDF2XHTML extends AbstractPDF2XHTML {
throw new TikaException("Unable to extract PDF content", e);
}
}
- if (pdf2XHTML.exceptions.size() > 0) {
- //throw the first
- throw new TikaException("Unable to extract all PDF content",
- pdf2XHTML.exceptions.get(0));
- }
}
http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 61b8ba2..5276f81 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -34,6 +34,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.pdfbox.rendering.ImageType;
+import org.apache.tika.Tika;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.AccessPermissionException;
@@ -1261,6 +1262,14 @@ public class PDFParserTest extends TikaTest {
assertEquals("Sample Title", m.get(TikaCoreProperties.TITLE));
}
+ @Test
+ public void testMaxLength() throws Exception {
+ InputStream is = getResourceAsStream("/test-documents/testPDF.pdf");
+ String content = new Tika().parseToString(is, new Metadata(), 100);
+
+ assertTrue(content.length() <= 100);
+ }
+
private void assertException(String path, Parser parser, ParseContext context, Class expected) {
boolean noEx = false;
InputStream is = getResourceAsStream(path);
[2/2] tika git commit: Merge branch 'TIKA-2098' of
https://github.com/alexshadow007/tika
Posted by ta...@apache.org.
Merge branch 'TIKA-2098' of https://github.com/alexshadow007/tika
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0a4b0e80
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0a4b0e80
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0a4b0e80
Branch: refs/heads/master
Commit: 0a4b0e80bad54e88c1f76cf8f37810757b1b34c9
Parents: 308d26f c33ac04
Author: tballison <ta...@mitre.org>
Authored: Mon Sep 26 14:59:59 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Sep 26 14:59:59 2016 -0400
----------------------------------------------------------------------
.../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++-----
.../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++
2 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------