You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2012/05/23 12:24:33 UTC

svn commit: r1341819 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java

Author: mikemccand
Date: Wed May 23 10:24:33 2012
New Revision: 1341819

URL: http://svn.apache.org/viewvc?rev=1341819&view=rev
Log:
PDFBOX-1320: fix NPE when visiting embedded files

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1341819&r1=1341818&r2=1341819&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Wed May 23 10:24:33 2012
@@ -177,25 +177,29 @@ public class PDFParser extends AbstractP
                     embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
                 }
 
-                for (Map.Entry<String,Object> ent : embeddedFiles.getNames().entrySet()) {
-                    PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
-                    PDEmbeddedFile file = spec.getEmbeddedFile();
-
-                    Metadata metadata = new Metadata();
-                    // TODO: other metadata?
-                    metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());
-                    metadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
-                    metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));
-
-                    if (embeddedExtractor.shouldParseEmbedded(metadata)) {
-                        TikaInputStream stream = TikaInputStream.get(file.createInputStream());
-                        try {
-                            embeddedExtractor.parseEmbedded(
-                                    stream,
-                                    new EmbeddedContentHandler(handler),
-                                    metadata, false);
-                        } finally {
-                            stream.close();
+                Map<String,Object> embeddedFileNames = embeddedFiles.getNames();
+
+                if (embeddedFileNames != null) {
+                    for (Map.Entry<String,Object> ent : embeddedFileNames.entrySet()) {
+                        PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
+                        PDEmbeddedFile file = spec.getEmbeddedFile();
+
+                        Metadata metadata = new Metadata();
+                        // TODO: other metadata?
+                        metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());
+                        metadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
+                        metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));
+
+                        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
+                            TikaInputStream stream = TikaInputStream.get(file.createInputStream());
+                            try {
+                                embeddedExtractor.parseEmbedded(
+                                                                stream,
+                                                                new EmbeddedContentHandler(handler),
+                                                                metadata, false);
+                            } finally {
+                                stream.close();
+                            }
                         }
                     }
                 }