You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/04/28 16:28:29 UTC
[tika] branch main updated: TIKA-2787 -- make
WriteLimitReachedException public for Tika 2.x
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new fa08843 TIKA-2787 -- make WriteLimitReachedException public for Tika 2.x
fa08843 is described below
commit fa08843cec46e8564a060d2c2ba2a05ad2f8df1e
Author: tballison <ta...@apache.org>
AuthorDate: Wed Apr 28 12:17:33 2021 -0400
TIKA-2787 -- make WriteLimitReachedException public for Tika 2.x
---
tika-core/src/main/java/org/apache/tika/Tika.java | 3 +-
...eached.java => WriteLimitReachedException.java} | 23 ++++++++-
.../apache/tika/parser/RecursiveParserWrapper.java | 19 +++++--
.../apache/tika/sax/WriteOutContentHandler.java | 60 ++++------------------
.../tika/sax/BasicContentHandlerFactoryTest.java | 6 +--
.../tika/server/classic/TikaResourceTest.java | 3 +-
.../apache/tika/server/core/TikaResourceTest.java | 2 +-
7 files changed, 54 insertions(+), 62 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/Tika.java b/tika-core/src/main/java/org/apache/tika/Tika.java
index 6c898a1..601703e 100644
--- a/tika-core/src/main/java/org/apache/tika/Tika.java
+++ b/tika-core/src/main/java/org/apache/tika/Tika.java
@@ -30,6 +30,7 @@ import org.xml.sax.SAXException;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.Metadata;
@@ -523,7 +524,7 @@ public class Tika {
context.set(Parser.class, parser);
parser.parse(stream, new BodyContentHandler(handler), metadata, context);
} catch (SAXException e) {
- if (!handler.isWriteLimitReached(e)) {
+ if (!WriteLimitReachedException.isWriteLimitReached(e)) {
// This should never happen with BodyContentHandler...
throw new TikaException("Unexpected SAX processing failure", e);
}
diff --git a/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java b/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
similarity index 54%
rename from tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java
rename to tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
index e861282..15225b4 100644
--- a/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReached.java
+++ b/tika-core/src/main/java/org/apache/tika/exception/WriteLimitReachedException.java
@@ -18,5 +18,26 @@ package org.apache.tika.exception;
import org.xml.sax.SAXException;
-public class WriteLimitReached extends SAXException {
+public class WriteLimitReachedException extends SAXException {
+
+ public WriteLimitReachedException(String msg) {
+ super(msg);
+ }
+
+ /**
+ * Checks whether the given exception (or any of it's root causes) was
+ * thrown by this handler as a signal of reaching the write limit.
+ *
+ * @param t throwable
+ * @return <code>true</code> if the write limit was reached,
+ * <code>false</code> otherwise
+ * @since Apache Tika 2.0
+ */
+ public static boolean isWriteLimitReached(Throwable t) {
+ if (t instanceof WriteLimitReachedException) {
+ return true;
+ } else {
+ return t.getCause() != null && isWriteLimitReached(t.getCause());
+ }
+ }
}
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 13e42b1..09309d1 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -26,7 +26,7 @@ import org.xml.sax.SAXException;
import org.apache.tika.exception.CorruptedFileException;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.exception.WriteLimitReached;
+import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.io.TemporaryResources;
@@ -185,7 +185,7 @@ public class RecursiveParserWrapper extends ParserDecorator {
* @return
*/
private boolean isWriteLimitReached(Throwable t) {
- if (t instanceof WriteLimitReached) {
+ if (t instanceof WriteLimitReachedException) {
return true;
} else if (t.getMessage() != null &&
t.getMessage().indexOf("Your document contained more than") == 0) {
@@ -358,7 +358,13 @@ public class RecursiveParserWrapper extends ParserDecorator {
int availableLength = Math.min(totalWriteLimit - totalChars, length);
super.characters(ch, start, availableLength);
if (availableLength < length) {
- throw new WriteLimitReached();
+ throw new WriteLimitReachedException(
+ "Your document contained more than " + totalWriteLimit +
+ " characters, and so your requested limit has been" +
+ " reached. To receive the full text of the document," +
+ " increase your limit. (Text up to the limit is" +
+ " however available)."
+ );
}
}
@@ -371,7 +377,12 @@ public class RecursiveParserWrapper extends ParserDecorator {
int availableLength = Math.min(totalWriteLimit - totalChars, length);
super.ignorableWhitespace(ch, start, availableLength);
if (availableLength < length) {
- throw new WriteLimitReached();
+ throw new WriteLimitReachedException("Your document contained more than "
+ + totalWriteLimit +
+ " characters, and so your requested limit has been" +
+ " reached. To receive the full text of the document," +
+ " increase your limit. (Text up to the limit is" + " however available)."
+ );
}
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
index 6006a70..920afaf 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
@@ -18,25 +18,21 @@ package org.apache.tika.sax;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
-import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.Charset;
-import java.util.UUID;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import org.apache.tika.exception.WriteLimitReachedException;
+
/**
* SAX event handler that writes content up to an optional write
* limit out to a character stream or other decorated handler.
*/
public class WriteOutContentHandler extends ContentHandlerDecorator {
- /**
- * The unique tag associated with exceptions from stream.
- */
- private final Serializable tag = UUID.randomUUID();
/**
* The maximum number of characters to write to the character stream.
@@ -101,7 +97,7 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
* <p>
* The internal string buffer is bounded at the given number of characters.
* If this write limit is reached, then a {@link SAXException} is thrown.
- * The {@link #isWriteLimitReached(Throwable)} method can be used to
+ * The {@link WriteLimitReachedException#isWriteLimitReached(Throwable)} method can be used to
* detect this case.
*
* @param writeLimit maximum number of characters to include in the string,
@@ -119,7 +115,8 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
* <p>
* The internal string buffer is bounded at 100k characters. If this
* write limit is reached, then a {@link SAXException} is thrown. The
- * {@link #isWriteLimitReached(Throwable)} method can be used to detect
+ * {@link WriteLimitReachedException#isWriteLimitReached(Throwable)} method can be used to
+ * detect
* this case.
*/
public WriteOutContentHandler() {
@@ -140,7 +137,7 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
throw new WriteLimitReachedException("Your document contained more than " + writeLimit +
" characters, and so your requested limit has been" +
" reached. To receive the full text of the document," +
- " increase your limit. (Text up to the limit is" + " however available).", tag);
+ " increase your limit. (Text up to the limit is" + " however available).");
}
}
@@ -152,50 +149,11 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
} else {
super.ignorableWhitespace(ch, start, writeLimit - writeCount);
writeCount = writeLimit;
- throw new WriteLimitReachedException("Your document contained more than " + writeLimit +
+ throw new WriteLimitReachedException("Your document contained more than "
+ + writeLimit +
" characters, and so your requested limit has been" +
" reached. To receive the full text of the document," +
- " increase your limit. (Text up to the limit is" + " however available).", tag);
- }
- }
-
- /**
- * Checks whether the given exception (or any of it's root causes) was
- * thrown by this handler as a signal of reaching the write limit.
- *
- * @param t throwable
- * @return <code>true</code> if the write limit was reached,
- * <code>false</code> otherwise
- * @since Apache Tika 0.7
- */
- public boolean isWriteLimitReached(Throwable t) {
- if (t instanceof WriteLimitReachedException) {
- return tag.equals(((WriteLimitReachedException) t).tag);
- } else {
- return t.getCause() != null && isWriteLimitReached(t.getCause());
+ " increase your limit. (Text up to the limit is however available).");
}
}
-
- /**
- * The exception used as a signal when the write limit has been reached.
- */
- private static class WriteLimitReachedException extends SAXException {
-
- /**
- * Serial version UID
- */
- private static final long serialVersionUID = -1850581945459429943L;
-
- /**
- * Serializable tag of the handler that caused this exception
- */
- private final Serializable tag;
-
- public WriteLimitReachedException(String message, Serializable tag) {
- super(message);
- this.tag = tag;
- }
-
- }
-
}
diff --git a/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java b/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
index 69c4ebb..c5b745c 100644
--- a/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
+++ b/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
@@ -35,6 +35,7 @@ import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -269,7 +270,7 @@ public class BasicContentHandlerFactoryTest {
try {
p.parse(null, handler, null, null);
} catch (SAXException e) {
- if (!handler.isWriteLimitReached(e)) {
+ if (!WriteLimitReachedException.isWriteLimitReached(e)) {
throw e;
}
wlr = true;
@@ -283,8 +284,7 @@ public class BasicContentHandlerFactoryTest {
try {
p.parse(null, handler, null, null);
} catch (SAXException e) {
- if (!e.getClass().toString().contains(
- "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException")) {
+ if (! WriteLimitReachedException.isWriteLimitReached(e)) {
throw e;
}
diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
index e0855cb..a8bb8d3 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
@@ -597,7 +597,8 @@ public class TikaResourceTest extends CXFTestBase {
assertNotFound("declare the causes", metadata.get(TikaCoreProperties.TIKA_CONTENT));
assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertTrue(metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION).startsWith(
- "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException"));
+ "org.apache.tika.exception.WriteLimitReachedException"
+ ));
assertNotFound("embed4.txt", metadata.get(TikaCoreProperties.TIKA_CONTENT));
}
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
index b058db2..fc392cb 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -169,7 +169,7 @@ public class TikaResourceTest extends CXFTestBase {
assertContains("Hello world", metadata.get(TikaCoreProperties.TIKA_CONTENT));
assertNotFound("dissolve", metadata.get(TikaCoreProperties.TIKA_CONTENT));
assertTrue(metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION).startsWith(
- "org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException"
+ "org.apache.tika.exception.WriteLimitReachedException"
));
}