You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/03/08 19:10:46 UTC
[tika] branch master updated: TIKA-2530 -- temporary workaround --
check for zero length byte array in rtf body to avoid buffer underflow from
POI, via Pascal Essiembre.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 8ecfeef TIKA-2530 -- temporary workaround -- check for zero length byte array in rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
8ecfeef is described below
commit 8ecfeeff86e991fa9b6c3fa12b89ec96aef00684
Author: tballison <ta...@mitre.org>
AuthorDate: Thu Mar 8 14:10:36 2018 -0500
TIKA-2530 -- temporary workaround -- check for zero length byte array in
rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
---
.../tika/parser/microsoft/OutlookExtractor.java | 30 +++++++++++++---------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index e8ce7f0..a9a6090 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -74,6 +74,7 @@ import org.apache.tika.parser.txt.CharsetMatch;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
+import org.bouncycastle.cms.Recipient;
import org.xml.sax.SAXException;
/**
@@ -321,19 +322,24 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
}
if (rtfChunk != null && (extractAllAlternatives || !doneBody)) {
ByteChunk chunk = (ByteChunk) rtfChunk;
- MAPIRtfAttribute rtf = new MAPIRtfAttribute(
- MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), chunk.getValue()
- );
- Parser rtfParser =
- EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
- if (rtfParser == null) {
- rtfParser = new RTFParser();
+ //avoid buffer underflow TIKA-2530
+ //TODO -- would be good to find an example triggering file and
+ //figure out if this is a bug in POI or a genuine 0 length chunk
+ if (chunk.getValue() != null && chunk.getValue().length > 0) {
+ MAPIRtfAttribute rtf = new MAPIRtfAttribute(
+ MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), chunk.getValue()
+ );
+ Parser rtfParser =
+ EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
+ if (rtfParser == null) {
+ rtfParser = new RTFParser();
+ }
+ rtfParser.parse(
+ new ByteArrayInputStream(rtf.getData()),
+ new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
+ new Metadata(), parseContext);
+ doneBody = true;
}
- rtfParser.parse(
- new ByteArrayInputStream(rtf.getData()),
- new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
- new Metadata(), parseContext);
- doneBody = true;
}
if (textChunk != null && (extractAllAlternatives || !doneBody)) {
xhtml.element("p", ((StringChunk) textChunk).getValue());
--
To stop receiving notification emails like this one, please contact
tallison@apache.org.