You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/08 15:34:43 UTC

[tika] 06/14: Improve TikaMemoryLimitException msg

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit f7f1be6ae18bc5a48701dc47d1378ceb8d7bbc67
Author: tallison <ta...@apache.org>
AuthorDate: Thu Apr 2 13:11:30 2020 -0400

    Improve TikaMemoryLimitException msg
---
 .../org/apache/tika/exception/TikaMemoryLimitException.java  | 10 ++++++++++
 .../org/apache/tika/parser/apple/AppleSingleFileParser.java  |  6 ++++--
 .../main/java/org/apache/tika/parser/image/ICNSParser.java   |  7 ++++++-
 .../org/apache/tika/parser/microsoft/onenote/OneNotePtr.java |  6 ++++--
 .../tika/parser/microsoft/onenote/OneNoteTreeWalker.java     |  3 ++-
 .../java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java |  3 ++-
 .../java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java    |  4 +---
 .../java/org/apache/tika/parser/rtf/RTFObjDataParser.java    | 12 +++++-------
 .../org/apache/tika/server/resource/UnpackerResource.java    |  2 +-
 9 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
index baf5818..010738e 100644
--- a/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaMemoryLimitException.java
@@ -27,4 +27,14 @@ public class TikaMemoryLimitException extends TikaException {
     public TikaMemoryLimitException(String msg) {
         super(msg);
     }
+
+    public TikaMemoryLimitException(long triedToAllocate, long maxAllowable) {
+        super(msg(triedToAllocate, maxAllowable));
+    }
+
+    private static String msg(long triedToAllocate, long maxAllowable) {
+        return "Tried to allocate "+triedToAllocate + " bytes, but "+ maxAllowable+
+                " is the maximum allowed. Please open an issue https://issues.apache.org/jira/projects/TIKA" +
+                " if you believe this file is not corrupt.";
+    }
 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
index fa41554..f32e501 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -28,6 +28,7 @@ import java.util.Set;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.EndianUtils;
@@ -48,6 +49,7 @@ import org.xml.sax.SAXException;
  */
 public class AppleSingleFileParser extends AbstractParser {
 
+    private static final int MAX_FIELD_LENGTH = 1_073_741_824;
     /**
      * Entry types
      */
@@ -123,8 +125,8 @@ public class AppleSingleFileParser extends AbstractParser {
             IOUtils.skipFully(stream, diff);
             bytesRead += diff;
             if (f.entryId == REAL_NAME) {
-                if (f.length > Integer.MAX_VALUE) {
-                    throw new TikaException("File name length can't be > integer max");
+                if (f.length > MAX_FIELD_LENGTH) {
+                    throw new TikaMemoryLimitException(f.length, MAX_FIELD_LENGTH);
                 }
                 buffer = new byte[(int)f.length];
                 IOUtils.readFully(stream, buffer);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
index 9a0bd87..f13d46d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ICNSParser.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.Set;
 import org.apache.poi.util.IOUtils;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -36,6 +37,7 @@ import org.xml.sax.SAXException;
  */
 public class ICNSParser extends AbstractParser {
     private static final long serialVersionUID = 922010233654248327L;
+    private static final long MAX_IMAGE_LENGTH_BYTES = 10485760;// 10MB
 
     private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image("icns"));
     public static final String ICNS_MIME_TYPE = "image/icns";
@@ -58,6 +60,9 @@ public class ICNSParser extends AbstractParser {
         }
         IOUtils.readFully(stream, header, 0, 4); //Extract image size/length of bytes in file
         int image_length = java.nio.ByteBuffer.wrap(header).getInt();
+        if (image_length > MAX_IMAGE_LENGTH_BYTES) {
+            throw new TikaMemoryLimitException(image_length, MAX_IMAGE_LENGTH_BYTES);
+        }
         byte[] full_file = new byte[image_length];
         IOUtils.readFully(stream, full_file);
         ArrayList<ICNSType> icons = new ArrayList<>();
@@ -74,7 +79,7 @@ public class ICNSParser extends AbstractParser {
             if (icnstype == null) {
                 //exit out of loop
                 //No more icons left
-                offset = image_length - 8;
+                break;
             } else if (icnstype.hasMask() == true) {
                 icon_masks.add(findIconType(tempByteArray));
             } else {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
index 85b20e8..0dc13f3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
@@ -628,7 +628,8 @@ class OneNotePtr {
             }
 
             if (cch > dif.size()) {
-                throw new TikaMemoryLimitException("CCH=" + cch + " was found that was great than file size " + dif.size());
+                throw new TikaMemoryLimitException("CCH=" + cch + " was found that was greater" +
+                        " than file size " + dif.size());
             }
             ByteBuffer dataSpaceBuffer = ByteBuffer.allocate((int) cch * 2);
             dif.read(dataSpaceBuffer);
@@ -1174,7 +1175,8 @@ class OneNotePtr {
 
     public void dumpHex() throws TikaMemoryLimitException, IOException {
         if (end - offset > dif.size()) {
-            throw new TikaMemoryLimitException("Exceeded memory limit when trying to dumpHex - " + (end - offset) + " > " + dif.size());
+            throw new TikaMemoryLimitException("Exceeded memory limit when trying to dumpHex - " +
+                    "" + (end - offset) + " > " + dif.size());
         }
         ByteBuffer byteBuffer = ByteBuffer.allocate((int) (end - offset));
         LOG.debug(Hex.encodeHexString(byteBuffer.array()));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index 14b3745..6d94acf 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -302,7 +302,8 @@ class OneNoteTreeWalker {
         OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
         content.reposition(fileDataStoreObjectReference.ref.fileData);
         if (fileDataStoreObjectReference.ref.fileData.cb > dif.size()) {
-            throw new TikaMemoryLimitException("File data store cb " + fileDataStoreObjectReference.ref.fileData.cb +
+            throw new TikaMemoryLimitException("File data store cb " +
+                    fileDataStoreObjectReference.ref.fileData.cb +
               " exceeds document size: " + dif.size());
         }
         handleEmbedded((int)fileDataStoreObjectReference.ref.fileData.cb);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
index a469de8..95af12d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
@@ -454,7 +454,8 @@ class ImageGraphicsEngine extends PDFGraphicsStreamEngine {
         BoundedInputStream bis = new BoundedInputStream(MAX_IMAGE_LENGTH_BYTES, is);
         IOUtils.copy(bis, os);
         if (bis.hasHitBound()) {
-            throw new TikaMemoryLimitException("Image size is larger than allowed (" + MAX_IMAGE_LENGTH_BYTES + ")");
+            throw new TikaMemoryLimitException(
+                    "Image size is larger than allowed (" + MAX_IMAGE_LENGTH_BYTES + ")");
         }
 
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 26f0084..a0c5074 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -153,9 +153,7 @@ class RTFEmbObjHandler {
             throw new TikaException("Requesting I read < 0 bytes ?!");
         }
         if (len > memoryLimitInKb*1024) {
-            throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
-                    ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+
-                    "If this is a valid RTF file, consider increasing the memory limit via TikaConfig.");
+            throw new TikaMemoryLimitException(len, (memoryLimitInKb*1024));
         }
 
         byte[] bytes = new byte[len];
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
index 7d6d24f..45234eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
@@ -155,7 +155,8 @@ class RTFObjDataParser {
                             new DocumentInputStream((DocumentEntry)ooxml))) {
                     IOUtils.copy(bis, out);
                     if (bis.hasHitBound()) {
-                        throw new TikaMemoryLimitException("Hit memory limit exception. Tried to copy > "+memoryLimitInKb*1024);
+                        throw new TikaMemoryLimitException(
+                                (memoryLimitInKb*1024+1), (memoryLimitInKb*1024));
                     }
                 }
                 ret = out.toByteArray();
@@ -190,7 +191,7 @@ class RTFObjDataParser {
                     BoundedInputStream bis = new BoundedInputStream(memoryLimitInKb*1024, is);
                     IOUtils.copy(is, out);
                     if (bis.hasHitBound()) {
-                        throw new TikaMemoryLimitException("Hit memory limit exception. Tried to copy > "+memoryLimitInKb*1024);
+                        throw new TikaMemoryLimitException(memoryLimitInKb*1024+1, memoryLimitInKb*1024);
                     }
                     ret = out.toByteArray();
                     metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
@@ -325,12 +326,9 @@ class RTFObjDataParser {
         if (len < 0) {
             throw new IOException("Requested length for reading bytes < 0?!: " + len);
         } else if (memoryLimitInKb > -1 && len > memoryLimitInKb*1024) {
-            throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
-                    ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+
-                    "If this is a valid RTF file, consider increasing the memory limit via TikaConfig.");
+            throw new TikaMemoryLimitException(len, memoryLimitInKb*1024);
         } else if (len > Integer.MAX_VALUE) {
-            throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len +
-                    ") bytes), but there is a hard limit of Integer.MAX_VALUE+");
+            throw new TikaMemoryLimitException(len, Integer.MAX_VALUE);
         }
 
         return new byte[(int) len];
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index 7085345..76acde9 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -186,7 +186,7 @@ public class UnpackerResource {
             IOUtils.copy(bis, bos);
             if (bis.hasHitBound()) {
                 throw new IOExceptionWithCause(
-                        new TikaMemoryLimitException("Tried to read beyond "+MAX_ATTACHMENT_BYTES + "bytes"));
+                        new TikaMemoryLimitException(MAX_ATTACHMENT_BYTES+1, MAX_ATTACHMENT_BYTES));
             }
             byte[] data = bos.toByteArray();