You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/05/12 17:14:09 UTC
svn commit: r1593996 - in /tika/trunk: CHANGES.txt
tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
Author: tallison
Date: Mon May 12 15:14:09 2014
New Revision: 1593996
URL: http://svn.apache.org/r1593996
Log:
TIKA-1231: added more null checks after underlying fix was made in PDFBox-1.8.5
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1593996&r1=1593995&r2=1593996&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon May 12 15:14:09 2014
@@ -1,5 +1,7 @@
Release 1.6 - ??/??/2014
+ * Upgrade to PDFBox 1.8.5 (TIKA-1290, TIKA-1231, TIKA-1233)
+
* Zip Container Detection for DWFX and XPS formats, which are OPC
based (TIKA-1204, TIKA-1221)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1593996&r1=1593995&r2=1593996&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java Mon May 12 15:14:09 2014
@@ -440,8 +440,15 @@ class PDF2XHTML extends PDFTextStripper
EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet()) {
PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
+ if (spec == null) {
+ //skip silently
+ continue;
+ }
PDEmbeddedFile file = spec.getEmbeddedFile();
-
+ if (file == null) {
+ //skip silently
+ continue;
+ }
Metadata metadata = new Metadata();
// TODO: other metadata?
metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());