You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/09/26 19:02:22 UTC

[1/2] tika git commit: fix for TIKA-2098 contributed by alexshadow007

Repository: tika
Updated Branches:
  refs/heads/master 308d26fb2 -> 0a4b0e80b


fix for TIKA-2098 contributed by alexshadow007


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c33ac046
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c33ac046
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c33ac046

Branch: refs/heads/master
Commit: c33ac04618f97c06fe4508b5d41465b2c11ba1b9
Parents: ce07d8a
Author: Alexander Kazakov <al...@gmail.com>
Authored: Mon Sep 26 21:48:11 2016 +0300
Committer: Alexander Kazakov <al...@gmail.com>
Committed: Mon Sep 26 21:48:11 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++-----
 .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 34a3aff..5dd0680 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -122,6 +122,10 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 }
             });
 
+            if (pdf2XHTML.exceptions.size() > 0) {
+                //throw the first
+                throw pdf2XHTML.exceptions.get(0);
+            }
         } catch (IOException e) {
             if (e.getCause() instanceof SAXException) {
                 throw (SAXException) e.getCause();
@@ -129,11 +133,6 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 throw new TikaException("Unable to extract PDF content", e);
             }
         }
-        if (pdf2XHTML.exceptions.size() > 0) {
-            //throw the first
-            throw new TikaException("Unable to extract all PDF content",
-                    pdf2XHTML.exceptions.get(0));
-        }
     }
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c33ac046/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 61b8ba2..5276f81 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -34,6 +34,7 @@ import org.apache.commons.io.IOUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.pdfbox.rendering.ImageType;
+import org.apache.tika.Tika;
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.AccessPermissionException;
@@ -1261,6 +1262,14 @@ public class PDFParserTest extends TikaTest {
         assertEquals("Sample Title", m.get(TikaCoreProperties.TITLE));
     }
 
+    @Test
+    public void testMaxLength() throws Exception {
+        InputStream is = getResourceAsStream("/test-documents/testPDF.pdf");
+        String content = new Tika().parseToString(is, new Metadata(), 100);
+
+        assertTrue(content.length() <= 100);
+    }
+
     private void assertException(String path, Parser parser, ParseContext context, Class expected) {
         boolean noEx = false;
         InputStream is = getResourceAsStream(path);


[2/2] tika git commit: Merge branch 'TIKA-2098' of https://github.com/alexshadow007/tika

Posted by ta...@apache.org.
Merge branch 'TIKA-2098' of https://github.com/alexshadow007/tika


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0a4b0e80
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0a4b0e80
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0a4b0e80

Branch: refs/heads/master
Commit: 0a4b0e80bad54e88c1f76cf8f37810757b1b34c9
Parents: 308d26f c33ac04
Author: tballison <ta...@mitre.org>
Authored: Mon Sep 26 14:59:59 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Sep 26 14:59:59 2016 -0400

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java | 9 ++++-----
 .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 9 +++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------