You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/11/13 15:29:07 UTC
[tika] branch master updated: TIKA-2488 -- catch potential npe in
getting attachment's inputstream
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 9c2e1b9 TIKA-2488 -- catch potential npe in getting attachment's inputstream
9c2e1b9 is described below
commit 9c2e1b9d839cffbc0820b789ffdd17a9a0b10759
Author: tballison <ta...@mitre.org>
AuthorDate: Mon Nov 13 10:28:57 2017 -0500
TIKA-2488 -- catch potential npe in getting attachment's inputstream
---
CHANGES.txt | 3 ++
.../apache/tika/parser/mbox/OutlookPSTParser.java | 35 ++++++++++++++--------
2 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 7cd9784..8cb683c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
Release 1.17 - ???
+ * Catch potential NPE in getting InputStream for attachments
+ in PST file (TIKA-2488).
+
* Upgrade to PDFBox 2.0.8 (TIKA-2489).
* Allow configuration of markLimit in EncodingDetectors
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
index 7a6ada8..61d7bac 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
@@ -125,8 +125,14 @@ public class OutlookPSTParser extends AbstractParser {
handler.startElement("div", attributes);
handler.element("h1", pstMail.getSubject());
- parserMailItem(handler, pstMail, embeddedExtractor);
- parseMailAttachments(handler, pstMail, embeddedExtractor);
+ final Metadata mailMetadata = new Metadata();
+ //parse attachments first so that stream exceptions
+ //in attachments can make it into mailMetadata.
+ //RecursiveParserWrapper copies the metadata and thereby prevents
+ //modifications to mailMetadata from making it into the
+ //metadata objects cached by the RecursiveParserWrapper
+ parseMailAttachments(handler, pstMail, mailMetadata, embeddedExtractor);
+ parserMailItem(handler, pstMail, mailMetadata, embeddedExtractor);
handler.endElement("div");
@@ -144,8 +150,8 @@ public class OutlookPSTParser extends AbstractParser {
}
}
- private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
- Metadata mailMetadata = new Metadata();
+ private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, Metadata mailMetadata,
+ EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
mailMetadata.set(Metadata.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
mailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
mailMetadata.set(TikaCoreProperties.IDENTIFIER, pstMail.getInternetMessageId());
@@ -217,11 +223,12 @@ public class OutlookPSTParser extends AbstractParser {
embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true);
}
- private void parseMailAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor)
+ private void parseMailAttachments(XHTMLContentHandler xhtml, PSTMessage email,
+ final Metadata mailMetadata,
+ EmbeddedDocumentExtractor embeddedExtractor)
throws TikaException {
int numberOfAttachments = email.getNumberOfAttachments();
for (int i = 0; i < numberOfAttachments; i++) {
- File tempFile = null;
try {
PSTAttachment attach = email.getAttachment(i);
@@ -241,21 +248,25 @@ public class OutlookPSTParser extends AbstractParser {
attributes.addAttribute("", "id", "id", "CDATA", filename);
xhtml.startElement("div", attributes);
if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
- TemporaryResources tmp = new TemporaryResources();
+ TikaInputStream tis = null;
+ try {
+ tis = TikaInputStream.get(attach.getFileInputStream());
+ } catch (NullPointerException e) {//TIKA-2488
+ EmbeddedDocumentUtil.recordEmbeddedStreamException(e, mailMetadata);
+ continue;
+ }
+
try {
- TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
} finally {
- tmp.dispose();
+
+ tis.close();
}
}
xhtml.endElement("div");
} catch (Exception e) {
throw new TikaException("Unable to unpack document stream", e);
- } finally {
- if (tempFile != null)
- tempFile.delete();
}
}
}
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].