You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/08 15:34:43 UTC
[tika] 06/14: Improve TikaMemoryLimitException msg
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit f7f1be6ae18bc5a48701dc47d1378ceb8d7bbc67
Author: tallison <ta...@apache.org>
AuthorDate: Thu Apr 2 13:11:30 2020 -0400
Improve TikaMemoryLimitException msg
---
.../org/apache/tika/exception/TikaMemoryLimitException.java | 10 ++++++++++
.../org/apache/tika/parser/apple/AppleSingleFileParser.java | 6 ++++--
.../main/java/org/apache/tika/parser/image/ICNSParser.java | 7 ++++++-
.../org/apache/tika/parser/microsoft/onenote/OneNotePtr.java | 6 ++++--
.../tika/parser/microsoft/onenote/OneNoteTreeWalker.java | 3 ++-
.../java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java | 3 ++-
.../java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java | 4 +---
.../java/org/apache/tika/parser/rtf/RTFObjDataParser.java | 12 +++++-------
.../org/apache/tika/server/resource/UnpackerResource.java | 2 +-
9 files changed, 35 insertions(+), 18 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
index baf5818..010738e 100644
--- a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
@@ -27,4 +27,14 @@ public class TikaMemoryLimitException extends TikaException {
public TikaMemoryLimitException(String msg) {
super(msg);
}
+
+ public TikaMemoryLimitException(long triedToAllocate, long maxAllowable) {
+ super(msg(triedToAllocate, maxAllowable));
+ }
+
+ private static String msg(long triedToAllocate, long maxAllowable) {
+ return "Tried to allocate "+triedToAllocate + " bytes, but "+ maxAllowable+
+ " is the maximum allowed. Please open an issue https://issues.apache.org/jira/projects/TIKA" +
+ " if you believe this file is not corrupt.";
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
index fa41554..f32e501 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -28,6 +28,7 @@ import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.EndianUtils;
@@ -48,6 +49,7 @@ import org.xml.sax.SAXException;
*/
public class AppleSingleFileParser extends AbstractParser {
+ private static final int MAX_FIELD_LENGTH = 1_073_741_824;
/**
* Entry types
*/
@@ -123,8 +125,8 @@ public class AppleSingleFileParser extends AbstractParser {
IOUtils.skipFully(stream, diff);
bytesRead += diff;
if (f.entryId == REAL_NAME) {
- if (f.length > Integer.MAX_VALUE) {
- throw new TikaException("File name length can't be > integer max");
+ if (f.length > MAX_FIELD_LENGTH) {
+ throw new TikaMemoryLimitException(f.length, MAX_FIELD_LENGTH);
}
buffer = new byte[(int)f.length];
IOUtils.readFully(stream, buffer);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
index 9a0bd87..f13d46d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
@@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.Set;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -36,6 +37,7 @@ import org.xml.sax.SAXException;
*/
public class ICNSParser extends AbstractParser {
private static final long serialVersionUID = 922010233654248327L;
+ private static final long MAX_IMAGE_LENGTH_BYTES = 10485760;// 10MB
private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image("icns"));
public static final String ICNS_MIME_TYPE = "image/icns";
@@ -58,6 +60,9 @@ public class ICNSParser extends AbstractParser {
}
IOUtils.readFully(stream, header, 0, 4); //Extract image size/length of bytes in file
int image_length = java.nio.ByteBuffer.wrap(header).getInt();
+ if (image_length > MAX_IMAGE_LENGTH_BYTES) {
+ throw new TikaMemoryLimitException(image_length, MAX_IMAGE_LENGTH_BYTES);
+ }
byte[] full_file = new byte[image_length];
IOUtils.readFully(stream, full_file);
ArrayList<ICNSType> icons = new ArrayList<>();
@@ -74,7 +79,7 @@ public class ICNSParser extends AbstractParser {
if (icnstype == null) {
//exit out of loop
//No more icons left
- offset = image_length - 8;
+ break;
} else if (icnstype.hasMask() == true) {
icon_masks.add(findIconType(tempByteArray));
} else {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
index 85b20e8..0dc13f3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
@@ -628,7 +628,8 @@ class OneNotePtr {
}
if (cch > dif.size()) {
- throw new TikaMemoryLimitException("CCH=" + cch + " was found that was great than file size " + dif.size());
+ throw new TikaMemoryLimitException("CCH=" + cch + " was found that was greater" +
+ " than file size " + dif.size());
}
ByteBuffer dataSpaceBuffer = ByteBuffer.allocate((int) cch * 2);
dif.read(dataSpaceBuffer);
@@ -1174,7 +1175,8 @@ class OneNotePtr {
public void dumpHex() throws TikaMemoryLimitException, IOException {
if (end - offset > dif.size()) {
- throw new TikaMemoryLimitException("Exceeded memory limit when trying to dumpHex - " + (end - offset) + " > " + dif.size());
+ throw new TikaMemoryLimitException("Exceeded memory limit when trying to dumpHex - " +
+ "" + (end - offset) + " > " + dif.size());
}
ByteBuffer byteBuffer = ByteBuffer.allocate((int) (end - offset));
LOG.debug(Hex.encodeHexString(byteBuffer.array()));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index 14b3745..6d94acf 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -302,7 +302,8 @@ class OneNoteTreeWalker {
OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
content.reposition(fileDataStoreObjectReference.ref.fileData);
if (fileDataStoreObjectReference.ref.fileData.cb > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + fileDataStoreObjectReference.ref.fileData.cb +
+ throw new TikaMemoryLimitException("File data store cb " +
+ fileDataStoreObjectReference.ref.fileData.cb +
" exceeds document size: " + dif.size());
}
handleEmbedded((int)fileDataStoreObjectReference.ref.fileData.cb);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
index a469de8..95af12d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
@@ -454,7 +454,8 @@ class ImageGraphicsEngine extends PDFGraphicsStreamEngine {
BoundedInputStream bis = new BoundedInputStream(MAX_IMAGE_LENGTH_BYTES, is);
IOUtils.copy(bis, os);
if (bis.hasHitBound()) {
- throw new TikaMemoryLimitException("Image size is larger than allowed (" + MAX_IMAGE_LENGTH_BYTES + ")");
+ throw new TikaMemoryLimitException(
+ "Image size is larger than allowed (" + MAX_IMAGE_LENGTH_BYTES + ")");
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 26f0084..a0c5074 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -153,9 +153,7 @@ class RTFEmbObjHandler {
throw new TikaException("Requesting I read < 0 bytes ?!");
}
if (len > memoryLimitInKb*1024) {
- throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
- ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+
- "If this is a valid RTF file, consider increasing the memory limit via TikaConfig.");
+ throw new TikaMemoryLimitException(len, (memoryLimitInKb*1024));
}
byte[] bytes = new byte[len];
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
index 7d6d24f..45234eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
@@ -155,7 +155,8 @@ class RTFObjDataParser {
new DocumentInputStream((DocumentEntry)ooxml))) {
IOUtils.copy(bis, out);
if (bis.hasHitBound()) {
- throw new TikaMemoryLimitException("Hit memory limit exception. Tried to copy > "+memoryLimitInKb*1024);
+ throw new TikaMemoryLimitException(
+ (memoryLimitInKb*1024+1), (memoryLimitInKb*1024));
}
}
ret = out.toByteArray();
@@ -190,7 +191,7 @@ class RTFObjDataParser {
BoundedInputStream bis = new BoundedInputStream(memoryLimitInKb*1024, is);
IOUtils.copy(is, out);
if (bis.hasHitBound()) {
- throw new TikaMemoryLimitException("Hit memory limit exception. Tried to copy > "+memoryLimitInKb*1024);
+ throw new TikaMemoryLimitException(memoryLimitInKb*1024+1, memoryLimitInKb*1024);
}
ret = out.toByteArray();
metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
@@ -325,12 +326,9 @@ class RTFObjDataParser {
if (len < 0) {
throw new IOException("Requested length for reading bytes < 0?!: " + len);
} else if (memoryLimitInKb > -1 && len > memoryLimitInKb*1024) {
- throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
- ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+
- "If this is a valid RTF file, consider increasing the memory limit via TikaConfig.");
+ throw new TikaMemoryLimitException(len, memoryLimitInKb*1024);
} else if (len > Integer.MAX_VALUE) {
- throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
- ") bytes), but there is a hard limit of Integer.MAX_VALUE+");
+ throw new TikaMemoryLimitException(len, Integer.MAX_VALUE);
}
return new byte[(int) len];
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index 7085345..76acde9 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -186,7 +186,7 @@ public class UnpackerResource {
IOUtils.copy(bis, bos);
if (bis.hasHitBound()) {
throw new IOExceptionWithCause(
- new TikaMemoryLimitException("Tried to read beyond "+MAX_ATTACHMENT_BYTES + "bytes"));
+ new TikaMemoryLimitException(MAX_ATTACHMENT_BYTES+1, MAX_ATTACHMENT_BYTES));
}
byte[] data = bos.toByteArray();