You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/04/28 16:28:29 UTC

[tika] branch main updated: TIKA-2787 -- make WriteLimitReachedException public for Tika 2.x

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new fa08843  TIKA-2787 -- make WriteLimitReachedException public for Tika 2.x
fa08843 is described below

commit fa08843cec46e8564a060d2c2ba2a05ad2f8df1e
Author: tballison <ta...@apache.org>
AuthorDate: Wed Apr 28 12:17:33 2021 -0400

    TIKA-2787 -- make WriteLimitReachedException public for Tika 2.x
---
 tika-core/src/main/java/org/apache/tika/Tika.java  |  3 +-
 ...eached.java => WriteLimitReachedException.java} | 23 ++++++++-
 .../apache/tika/parser/RecursiveParserWrapper.java | 19 +++++--
 .../apache/tika/sax/WriteOutContentHandler.java    | 60 ++++------------------
 .../tika/sax/BasicContentHandlerFactoryTest.java   |  6 +--
 .../tika/server/classic/TikaResourceTest.java      |  3 +-
 .../apache/tika/server/core/TikaResourceTest.java  |  2 +-
 7 files changed, 54 insertions(+), 62 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/Tika.java b/tika-core/src/main/java/org/apache/tika/Tika.java
index 6c898a1..601703e 100644
--- a/tika-core/src/main/java/org/apache/tika/Tika.java
+++ b/tika-core/src/main/java/org/apache/tika/Tika.java
@@ -30,6 +30,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.language.translate.Translator;
 import org.apache.tika.metadata.Metadata;
@@ -523,7 +524,7 @@ public class Tika {
             context.set(Parser.class, parser);
             parser.parse(stream, new BodyContentHandler(handler), metadata, context);
         } catch (SAXException e) {
-            if (!handler.isWriteLimitReached(e)) {
+            if (!WriteLimitReachedException.isWriteLimitReached(e)) {
                 // This should never happen with BodyContentHandler...
                 throw new TikaException("Unexpected SAX processing failure", e);
             }
diff --git a/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java b/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
similarity index 54%
rename from tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java
rename to tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
index e861282..15225b4 100644
--- a/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java
+++ b/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
@@ -18,5 +18,26 @@ package org.apache.tika.exception;
 
 import org.xml.sax.SAXException;
 
-public class WriteLimitReached extends SAXException {
+public class WriteLimitReachedException extends SAXException {
+
+    public WriteLimitReachedException(String msg) {
+        super(msg);
+    }
+
+    /**
+     * Checks whether the given exception (or any of it's root causes) was
+     * thrown by this handler as a signal of reaching the write limit.
+     *
+     * @param t throwable
+     * @return <code>true</code> if the write limit was reached,
+     * <code>false</code> otherwise
+     * @since Apache Tika 2.0
+     */
+    public static boolean isWriteLimitReached(Throwable t) {
+        if (t instanceof WriteLimitReachedException) {
+            return true;
+        } else {
+            return t.getCause() != null && isWriteLimitReached(t.getCause());
+        }
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 13e42b1..09309d1 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -26,7 +26,7 @@ import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.CorruptedFileException;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.exception.WriteLimitReached;
+import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.io.FilenameUtils;
 import org.apache.tika.io.TemporaryResources;
@@ -185,7 +185,7 @@ public class RecursiveParserWrapper extends ParserDecorator {
      * @return
      */
     private boolean isWriteLimitReached(Throwable t) {
-        if (t instanceof WriteLimitReached) {
+        if (t instanceof WriteLimitReachedException) {
             return true;
         } else if (t.getMessage() != null &&
                 t.getMessage().indexOf("Your document contained more than") == 0) {
@@ -358,7 +358,13 @@ public class RecursiveParserWrapper extends ParserDecorator {
             int availableLength = Math.min(totalWriteLimit - totalChars, length);
             super.characters(ch, start, availableLength);
             if (availableLength < length) {
-                throw new WriteLimitReached();
+                throw new WriteLimitReachedException(
+                        "Your document contained more than " + totalWriteLimit +
+                                " characters, and so your requested limit has been" +
+                                " reached. To receive the full text of the document," +
+                                " increase your limit. (Text up to the limit is" +
+                                " however available)."
+                );
             }
         }
 
@@ -371,7 +377,12 @@ public class RecursiveParserWrapper extends ParserDecorator {
             int availableLength = Math.min(totalWriteLimit - totalChars, length);
             super.ignorableWhitespace(ch, start, availableLength);
             if (availableLength < length) {
-                throw new WriteLimitReached();
+                throw new WriteLimitReachedException("Your document contained more than "
+                        + totalWriteLimit +
+                        " characters, and so your requested limit has been" +
+                        " reached. To receive the full text of the document," +
+                        " increase your limit. (Text up to the limit is" + " however available)."
+                );
             }
         }
     }
diff --git a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
index 6006a70..920afaf 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
@@ -18,25 +18,21 @@ package org.apache.tika.sax;
 
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
-import java.io.Serializable;
 import java.io.StringWriter;
 import java.io.Writer;
 import java.nio.charset.Charset;
-import java.util.UUID;
 
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import org.apache.tika.exception.WriteLimitReachedException;
+
 /**
  * SAX event handler that writes content up to an optional write
  * limit out to a character stream or other decorated handler.
  */
 public class WriteOutContentHandler extends ContentHandlerDecorator {
 
-    /**
-     * The unique tag associated with exceptions from stream.
-     */
-    private final Serializable tag = UUID.randomUUID();
 
     /**
      * The maximum number of characters to write to the character stream.
@@ -101,7 +97,7 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
      * <p>
      * The internal string buffer is bounded at the given number of characters.
      * If this write limit is reached, then a {@link SAXException} is thrown.
-     * The {@link #isWriteLimitReached(Throwable)} method can be used to
+     * The {@link WriteLimitReachedException#isWriteLimitReached(Throwable)} method can be used to
      * detect this case.
      *
      * @param writeLimit maximum number of characters to include in the string,
@@ -119,7 +115,8 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
      * <p>
      * The internal string buffer is bounded at 100k characters. If this
      * write limit is reached, then a {@link SAXException} is thrown. The
-     * {@link #isWriteLimitReached(Throwable)} method can be used to detect
+     * {@link WriteLimitReachedException#isWriteLimitReached(Throwable)} method can be used to
+     * detect
      * this case.
      */
     public WriteOutContentHandler() {
@@ -140,7 +137,7 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
             throw new WriteLimitReachedException("Your document contained more than " + writeLimit +
                     " characters, and so your requested limit has been" +
                     " reached. To receive the full text of the document," +
-                    " increase your limit. (Text up to the limit is" + " however available).", tag);
+                    " increase your limit. (Text up to the limit is" + " however available).");
         }
     }
 
@@ -152,50 +149,11 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
         } else {
             super.ignorableWhitespace(ch, start, writeLimit - writeCount);
             writeCount = writeLimit;
-            throw new WriteLimitReachedException("Your document contained more than " + writeLimit +
+            throw new WriteLimitReachedException("Your document contained more than "
+                    + writeLimit +
                     " characters, and so your requested limit has been" +
                     " reached. To receive the full text of the document," +
-                    " increase your limit. (Text up to the limit is" + " however available).", tag);
-        }
-    }
-
-    /**
-     * Checks whether the given exception (or any of it's root causes) was
-     * thrown by this handler as a signal of reaching the write limit.
-     *
-     * @param t throwable
-     * @return <code>true</code> if the write limit was reached,
-     * <code>false</code> otherwise
-     * @since Apache Tika 0.7
-     */
-    public boolean isWriteLimitReached(Throwable t) {
-        if (t instanceof WriteLimitReachedException) {
-            return tag.equals(((WriteLimitReachedException) t).tag);
-        } else {
-            return t.getCause() != null && isWriteLimitReached(t.getCause());
+                    " increase your limit. (Text up to the limit is however available).");
         }
     }
-
-    /**
-     * The exception used as a signal when the write limit has been reached.
-     */
-    private static class WriteLimitReachedException extends SAXException {
-
-        /**
-         * Serial version UID
-         */
-        private static final long serialVersionUID = -1850581945459429943L;
-
-        /**
-         * Serializable tag of the handler that caused this exception
-         */
-        private final Serializable tag;
-
-        public WriteLimitReachedException(String message, Serializable tag) {
-            super(message);
-            this.tag = tag;
-        }
-
-    }
-
 }
diff --git a/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java b/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
index 69c4ebb..c5b745c 100644
--- a/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
+++ b/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
@@ -35,6 +35,7 @@ import org.xml.sax.helpers.AttributesImpl;
 import org.xml.sax.helpers.DefaultHandler;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -269,7 +270,7 @@ public class BasicContentHandlerFactoryTest {
         try {
             p.parse(null, handler, null, null);
         } catch (SAXException e) {
-            if (!handler.isWriteLimitReached(e)) {
+            if (!WriteLimitReachedException.isWriteLimitReached(e)) {
                 throw e;
             }
             wlr = true;
@@ -283,8 +284,7 @@ public class BasicContentHandlerFactoryTest {
         try {
             p.parse(null, handler, null, null);
         } catch (SAXException e) {
-            if (!e.getClass().toString().contains(
-                    "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException")) {
+            if (! WriteLimitReachedException.isWriteLimitReached(e)) {
                 throw e;
             }
 
diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
index e0855cb..a8bb8d3 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
@@ -597,7 +597,8 @@ public class TikaResourceTest extends CXFTestBase {
         assertNotFound("declare the causes", metadata.get(TikaCoreProperties.TIKA_CONTENT));
         assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
         assertTrue(metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION).startsWith(
-                "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException"));
+                "org.apache.tika.exception.WriteLimitReachedException"
+        ));
         assertNotFound("embed4.txt", metadata.get(TikaCoreProperties.TIKA_CONTENT));
 
     }
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
index b058db2..fc392cb 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -169,7 +169,7 @@ public class TikaResourceTest extends CXFTestBase {
         assertContains("Hello world", metadata.get(TikaCoreProperties.TIKA_CONTENT));
         assertNotFound("dissolve", metadata.get(TikaCoreProperties.TIKA_CONTENT));
         assertTrue(metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION).startsWith(
-                "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException"
+                "org.apache.tika.exception.WriteLimitReachedException"
         ));
     }