You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/06/08 19:58:47 UTC

[tika] branch main updated: TIKA-3787 -- allow parse to continue after writelimit has been reached

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 7c93ddf7e TIKA-3787 -- allow parse to continue after writelimit has been reached
7c93ddf7e is described below

commit 7c93ddf7e3183fcbd811e04c1621455d961b1bb5
Author: tallison <ta...@apache.org>
AuthorDate: Wed Jun 8 15:58:40 2022 -0400

    TIKA-3787 -- allow parse to continue after writelimit has been reached
---
 CHANGES.txt                                        |  3 +
 .../apache/tika/metadata/TikaCoreProperties.java   |  4 ++
 .../org/apache/tika/parser/CompositeParser.java    | 41 +++++------
 .../java/org/apache/tika/parser/ParseRecord.java   | 83 ++++++++++++++++++++++
 .../apache/tika/parser/RecursiveParserWrapper.java | 54 +++++++++++---
 .../java/org/apache/tika/pipes/HandlerConfig.java  | 20 ++++--
 .../tika/pipes/pipesiterator/PipesIterator.java    |  3 +-
 .../tika/sax/BasicContentHandlerFactory.java       | 78 +++++++++++++-------
 .../java/org/apache/tika/sax/WriteLimiter.java     | 22 ++++++
 .../apache/tika/sax/WriteOutContentHandler.java    | 51 +++++++++++--
 .../apache/tika/parser/AutoDetectParserTest.java   | 46 ++++++++++++
 .../tika/parser/RecursiveParserWrapperTest.java    | 29 +++++++-
 .../metadata/serialization/JsonFetchEmitTuple.java |  4 +-
 .../serialization/JsonFetchEmitTupleTest.java      |  4 +-
 .../core/resource/RecursiveMetadataResource.java   |  2 +-
 .../tika/server/core/resource/TikaResource.java    | 20 +++++-
 .../org/apache/tika/server/core/TikaPipesTest.java |  2 +-
 .../apache/tika/server/core/TikaResourceTest.java  | 17 +++++
 .../standard/RecursiveMetadataResourceTest.java    | 37 +++++++++-
 .../apache/tika/server/standard/TikaPipesTest.java |  2 +-
 .../tika/server/standard/TikaResourceTest.java     | 21 +++++-
 21 files changed, 464 insertions(+), 79 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 8696143f2..cb76c07f2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 2.4.1 - ???
 
+   * Allow continuation of parsing after write limit has
+     been reached (TIKA-3787).
+
    * Allow pass-through of 'Content-Length' header to metadata
      in TikaResource (TIKA-3786).
 
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index 21581a482..c4035ea31 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -76,6 +76,10 @@ public interface TikaCoreProperties {
     Property EMBEDDED_EXCEPTION =
             Property.internalText(TIKA_META_EXCEPTION_PREFIX + "embedded_exception");
 
+    //warning while parsing in an embedded file
+    Property EMBEDDED_WARNING =
+            Property.internalText(TIKA_META_EXCEPTION_PREFIX + "embedded_warning");
+
     Property WRITE_LIMIT_REACHED =
             Property.internalBoolean(TIKA_META_EXCEPTION_PREFIX + "write_limit_reached");
     /**
diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index 60dfa3d97..29546546b 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -23,7 +23,6 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -40,6 +39,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.ParserUtils;
 
 /**
@@ -281,17 +281,17 @@ public class CompositeParser extends AbstractParser {
                       ParseContext context) throws IOException, SAXException, TikaException {
         Parser parser = getParser(metadata, context);
         TemporaryResources tmp = new TemporaryResources();
-        ParserRecord parserRecord = context.get(ParserRecord.class);
+        ParseRecord parserRecord = context.get(ParseRecord.class);
         if (parserRecord == null) {
-            parserRecord = new ParserRecord();
-            context.set(ParserRecord.class, parserRecord);
+            parserRecord = new ParseRecord();
+            context.set(ParseRecord.class, parserRecord);
         }
         try {
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             TaggedContentHandler taggedHandler =
                     handler != null ? new TaggedContentHandler(handler) : null;
             String parserClassname = ParserUtils.getParserClassname(parser);
-            parserRecord.add(parserClassname);
+            parserRecord.addParserClass(parserClassname);
             ParserUtils.recordParserDetails(parserClassname, metadata);
             parserRecord.beforeParse();
             try {
@@ -316,32 +316,25 @@ public class CompositeParser extends AbstractParser {
             parserRecord.afterParse();
             if (parserRecord.getDepth() == 0) {
                 metadata.set(TikaCoreProperties.TIKA_PARSED_BY_FULL_SET, parserRecord.getParsers());
+                recordEmbeddedMetadata(metadata, context);
             }
         }
     }
 
-    private static class ParserRecord {
-        int depth = 0;
-        Set<String> parsers = new LinkedHashSet<>();
-
-        void beforeParse() {
-            depth++;
-        }
-
-        void afterParse() {
-            depth--;
+    private void recordEmbeddedMetadata(Metadata metadata, ParseContext context) {
+        ParseRecord record = context.get(ParseRecord.class);
+        if (record == null) {
+            //this should never happen
+            return;
         }
-
-        int getDepth() {
-            return depth;
+        for (Exception e : record.getExceptions()) {
+            metadata.add(TikaCoreProperties.EMBEDDED_EXCEPTION, ExceptionUtils.getStackTrace(e));
         }
-
-        String[] getParsers() {
-            return parsers.toArray(new String[0]);
+        for (String msg : record.getWarnings()) {
+            metadata.add(TikaCoreProperties.EMBEDDED_WARNING, msg);
         }
-
-        void add(String parserClass) {
-            parsers.add(parserClass);
+        if (record.isWriteLimitReached()) {
+            metadata.set(TikaCoreProperties.WRITE_LIMIT_REACHED, true);
         }
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseRecord.java b/tika-core/src/main/java/org/apache/tika/parser/ParseRecord.java
new file mode 100644
index 000000000..081c01920
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseRecord.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Use this class to store exceptions, warnings and other information
+ * during the parse.  This information is added to the parent's metadata
+ * after the parse by the {@link CompositeParser}.
+ */
+public class ParseRecord {
+    private int depth = 0;
+    private final Set<String> parsers = new LinkedHashSet<>();
+
+    private final List<Exception> exceptions = new ArrayList<>();
+
+    private final List<String> warnings = new ArrayList<>();
+
+    private boolean writeLimitReached = false;
+
+    void beforeParse() {
+        depth++;
+    }
+
+    void afterParse() {
+        depth--;
+    }
+
+    public int getDepth() {
+        return depth;
+    }
+
+    public String[] getParsers() {
+        return parsers.toArray(new String[0]);
+    }
+
+    void addParserClass(String parserClass) {
+        parsers.add(parserClass);
+    }
+
+    public void addException(Exception e) {
+        exceptions.add(e);
+    }
+
+    public void addWarning(String msg) {
+        warnings.add(msg);
+    }
+
+    public void setWriteLimitReached(boolean writeLimitReached) {
+        this.writeLimitReached = writeLimitReached;
+    }
+
+    public List<Exception> getExceptions() {
+        return exceptions;
+    }
+
+    public List<String> getWarnings() {
+        return warnings;
+    }
+
+
+    public boolean isWriteLimitReached() {
+        return writeLimitReached;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 2d1b6c5fb..59db9b3f9 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -35,9 +35,10 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
-import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.apache.tika.sax.ContentHandlerFactory;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
 import org.apache.tika.sax.SecureContentHandler;
+import org.apache.tika.sax.WriteLimiter;
 import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.ParserUtils;
 
@@ -143,16 +144,21 @@ public class RecursiveParserWrapper extends ParserDecorator {
         parserState.recursiveParserWrapperHandler.startDocument();
         TemporaryResources tmp = new TemporaryResources();
         int writeLimit = -1;
-        //TODO -- rely on a new interface WriteLimiting...?
-        //It'd be better not to tie this to a specific class
-        if (recursiveParserWrapperHandler instanceof BasicContentHandlerFactory) { // TODO this cond is always false
-            writeLimit =
-                    ((BasicContentHandlerFactory)recursiveParserWrapperHandler).getWriteLimit();
+        boolean throwOnWriteLimitReached = true;
+
+        if (recursiveParserWrapperHandler instanceof AbstractRecursiveParserWrapperHandler) {
+            ContentHandlerFactory factory =
+                    ((AbstractRecursiveParserWrapperHandler)recursiveParserWrapperHandler).getContentHandlerFactory();
+            if (factory instanceof WriteLimiter) {
+                writeLimit = ((WriteLimiter)factory).getWriteLimit();
+                throwOnWriteLimitReached = ((WriteLimiter)factory).isThrowOnWriteLimitReached();
+            }
         }
         try {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
             RecursivelySecureContentHandler secureContentHandler =
-                    new RecursivelySecureContentHandler(localHandler, tis, writeLimit);
+                    new RecursivelySecureContentHandler(localHandler, tis, writeLimit,
+                            throwOnWriteLimitReached, context);
             context.set(RecursivelySecureContentHandler.class, secureContentHandler);
             getWrappedParser().parse(tis, secureContentHandler, metadata, context);
         } catch (Throwable e) {
@@ -287,13 +293,22 @@ public class RecursiveParserWrapper extends ParserDecorator {
         //total allowable chars across all handlers
         private final int totalWriteLimit;
 
+        private final boolean throwOnWriteLimitReached;
+
+        private final ParseContext parseContext;
+
+        private boolean writeLimitReached = false;
+
         //total chars written to all handlers
         private int totalChars = 0;
         public RecursivelySecureContentHandler(ContentHandler handler, TikaInputStream stream,
-                                               int totalWriteLimit) {
+                                               int totalWriteLimit,
+                                               boolean throwOnWriteLimitReached, ParseContext parseContext) {
             super(handler, stream);
             this.handler = handler;
             this.totalWriteLimit = totalWriteLimit;
+            this.throwOnWriteLimitReached = throwOnWriteLimitReached;
+            this.parseContext = parseContext;
         }
 
         public void updateContentHandler(ContentHandler handler) {
@@ -328,6 +343,10 @@ public class RecursiveParserWrapper extends ParserDecorator {
 
         @Override
         public void characters(char[] ch, int start, int length) throws SAXException {
+            if (writeLimitReached) {
+                return;
+            }
+
             if (totalWriteLimit < 0) {
                 super.characters(ch, start, length);
                 return;
@@ -335,12 +354,16 @@ public class RecursiveParserWrapper extends ParserDecorator {
             int availableLength = Math.min(totalWriteLimit - totalChars, length);
             super.characters(ch, start, availableLength);
             if (availableLength < length) {
-                throw new WriteLimitReachedException(totalWriteLimit);
+                handleWriteLimitReached();
             }
         }
 
         @Override
         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+            if (writeLimitReached) {
+                return;
+            }
+
             if (totalWriteLimit < 0) {
                 super.ignorableWhitespace(ch, start, length);
                 return;
@@ -348,7 +371,20 @@ public class RecursiveParserWrapper extends ParserDecorator {
             int availableLength = Math.min(totalWriteLimit - totalChars, length);
             super.ignorableWhitespace(ch, start, availableLength);
             if (availableLength < length) {
+                handleWriteLimitReached();
+            }
+        }
+
+        private void handleWriteLimitReached() throws WriteLimitReachedException {
+            writeLimitReached = true;
+
+            if (throwOnWriteLimitReached) {
                 throw new WriteLimitReachedException(totalWriteLimit);
+            } else {
+                ParseRecord parseRecord = parseContext.get(ParseRecord.class);
+                if (parseRecord != null) {
+                    parseRecord.setWriteLimitReached(true);
+                }
             }
         }
     }
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java
index a73e2290b..d128dcb3d 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java
@@ -31,7 +31,7 @@ public class HandlerConfig implements Serializable {
 
     public static final HandlerConfig DEFAULT_HANDLER_CONFIG =
             new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, PARSE_MODE.RMETA,
-                    -1, -1);
+                    -1, -1, true);
 
     /**
      * {@link PARSE_MODE#RMETA} "recursive metadata" is the same as the -J option
@@ -73,16 +73,19 @@ public class HandlerConfig implements Serializable {
 
     int writeLimit = -1;
     int maxEmbeddedResources = -1;
+
+    boolean throwOnWriteLimitReached = true;
     PARSE_MODE parseMode = PARSE_MODE.RMETA;
 
 
     public HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE type, PARSE_MODE parseMode,
                          int writeLimit,
-                         int maxEmbeddedResources) {
+                         int maxEmbeddedResources, boolean throwOnWriteLimitReached) {
         this.type = type;
         this.parseMode = parseMode;
         this.writeLimit = writeLimit;
         this.maxEmbeddedResources = maxEmbeddedResources;
+        this.throwOnWriteLimitReached = throwOnWriteLimitReached;
     }
 
     public BasicContentHandlerFactory.HANDLER_TYPE getType() {
@@ -101,6 +104,10 @@ public class HandlerConfig implements Serializable {
         return parseMode;
     }
 
+    public boolean isThrowOnWriteLimitReached() {
+        return throwOnWriteLimitReached;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) {
@@ -111,17 +118,20 @@ public class HandlerConfig implements Serializable {
         }
         HandlerConfig that = (HandlerConfig) o;
         return writeLimit == that.writeLimit && maxEmbeddedResources == that.maxEmbeddedResources &&
-                type == that.type && parseMode == that.parseMode;
+                throwOnWriteLimitReached == that.throwOnWriteLimitReached && type == that.type &&
+                parseMode == that.parseMode;
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(type, writeLimit, maxEmbeddedResources, parseMode);
+        return Objects.hash(type, writeLimit, maxEmbeddedResources, throwOnWriteLimitReached,
+                parseMode);
     }
 
     @Override
     public String toString() {
         return "HandlerConfig{" + "type=" + type + ", writeLimit=" + writeLimit +
-                ", maxEmbeddedResources=" + maxEmbeddedResources + ", mode=" + parseMode + '}';
+                ", maxEmbeddedResources=" + maxEmbeddedResources + ", throwOnWriteLimitReached=" +
+                throwOnWriteLimitReached + ", parseMode=" + parseMode + '}';
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java b/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java
index 42272743e..98b766ce7 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java
@@ -167,7 +167,8 @@ public abstract class PipesIterator extends ConfigBase
     }
 
     protected HandlerConfig getHandlerConfig() {
-        return new HandlerConfig(handlerType, parseMode, writeLimit, maxEmbeddedResources);
+        //TODO: make throwOnWriteLimitReached configurable
+        return new HandlerConfig(handlerType, parseMode, writeLimit, maxEmbeddedResources, false);
     }
 
     protected abstract void enqueue() throws IOException, TimeoutException, InterruptedException;
diff --git a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
index 9bc5da41f..9de0d4071 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
@@ -26,22 +26,50 @@ import java.util.Locale;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.helpers.DefaultHandler;
 
+import org.apache.tika.parser.ParseContext;
+
 /**
  * Basic factory for creating common types of ContentHandlers
  */
-public class BasicContentHandlerFactory implements ContentHandlerFactory {
+public class BasicContentHandlerFactory implements ContentHandlerFactory, WriteLimiter {
 
     private final HANDLER_TYPE type;
     private final int writeLimit;
 
+    private final boolean throwOnWriteLimitReached;
+
+    private final ParseContext parseContext;
+
     /**
      * @param type       basic type of handler
      * @param writeLimit max number of characters to store; if < 0,
      *                   the handler will store all characters
      */
     public BasicContentHandlerFactory(HANDLER_TYPE type, int writeLimit) {
+        this(type, writeLimit, true, null);
+    }
+
+    /**
+     *
+     * @param type basic type of handler
+     * @param writeLimit maximum number of characters to store
+     * @param throwOnWriteLimitReached whether or not to throw a
+     *          {@link org.apache.tika.exception.WriteLimitReachedException}
+     *                                 when the write limit has been reached
+     * @param parseContext to store the writelimitreached warning if
+     *                 throwOnWriteLimitReached is set to <code>false</code>
+     */
+    public BasicContentHandlerFactory(HANDLER_TYPE type, int writeLimit,
+                                      boolean throwOnWriteLimitReached, ParseContext parseContext) {
         this.type = type;
         this.writeLimit = writeLimit;
+        this.throwOnWriteLimitReached = throwOnWriteLimitReached;
+        this.parseContext = parseContext;
+        if (throwOnWriteLimitReached == false && parseContext == null) {
+            throw new IllegalArgumentException("parse context must not be null if " +
+                    "throwOnWriteLimitReached is false");
+        }
+
     }
 
     /**
@@ -82,33 +110,30 @@ public class BasicContentHandlerFactory implements ContentHandlerFactory {
     public ContentHandler getNewContentHandler() {
 
         if (type == HANDLER_TYPE.BODY) {
-            return new BodyContentHandler(writeLimit);
+            return new BodyContentHandler(
+                    new WriteOutContentHandler(new ToTextContentHandler(), writeLimit,
+                    throwOnWriteLimitReached, parseContext));
         } else if (type == HANDLER_TYPE.IGNORE) {
             return new DefaultHandler();
         }
-        if (writeLimit > -1) {
-            switch (type) {
-                case TEXT:
-                    return new WriteOutContentHandler(new ToTextContentHandler(), writeLimit);
-                case HTML:
-                    return new WriteOutContentHandler(new ToHTMLContentHandler(), writeLimit);
-                case XML:
-                    return new WriteOutContentHandler(new ToXMLContentHandler(), writeLimit);
-                default:
-                    return new WriteOutContentHandler(new ToTextContentHandler(), writeLimit);
-            }
-        } else {
-            switch (type) {
-                case TEXT:
-                    return new ToTextContentHandler();
-                case HTML:
-                    return new ToHTMLContentHandler();
-                case XML:
-                    return new ToXMLContentHandler();
-                default:
-                    return new ToTextContentHandler();
+        ContentHandler formatHandler = getFormatHandler();
+        if (writeLimit < 0) {
+            return formatHandler;
+        }
+        return new WriteOutContentHandler(formatHandler, writeLimit, throwOnWriteLimitReached,
+                parseContext);
+    }
 
-            }
+    private ContentHandler getFormatHandler() {
+        switch (type) {
+            case TEXT:
+                return new ToTextContentHandler();
+            case HTML:
+                return new ToHTMLContentHandler();
+            case XML:
+                return new ToXMLContentHandler();
+            default:
+                return new ToTextContentHandler();
         }
     }
 
@@ -182,4 +207,9 @@ public class BasicContentHandlerFactory implements ContentHandlerFactory {
     public int getWriteLimit() {
         return writeLimit;
     }
+
+    @Override
+    public boolean isThrowOnWriteLimitReached() {
+        return throwOnWriteLimitReached;
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/sax/WriteLimiter.java b/tika-core/src/main/java/org/apache/tika/sax/WriteLimiter.java
new file mode 100644
index 000000000..d82895a1b
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/sax/WriteLimiter.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+public interface WriteLimiter {
+    int getWriteLimit();
+    boolean isThrowOnWriteLimitReached();
+}
diff --git a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
index 87a31b973..672a8bf03 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
@@ -26,6 +26,8 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.WriteLimitReachedException;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.ParseRecord;
 
 /**
  * SAX event handler that writes content up to an optional write
@@ -45,6 +47,12 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
      */
     private int writeCount = 0;
 
+    private boolean throwOnWriteLimitReached = true;
+
+    private ParseContext parseContext = null;
+
+    private boolean writeLimitReached;
+
     /**
      * Creates a content handler that writes content up to the given
      * write limit to the given content handler.
@@ -118,37 +126,70 @@ public class WriteOutContentHandler extends ContentHandlerDecorator {
      * The internal string buffer is bounded at 100k characters. If this
      * write limit is reached, then a {@link SAXException} is thrown. The
      * {@link WriteLimitReachedException#isWriteLimitReached(Throwable)} method can be used to
-     * detect
-     * this case.
+     * detect this case.
      */
     public WriteOutContentHandler() {
         this(100 * 1000);
     }
 
+    /**
+     * The default is to throw a {@link WriteLimitReachedException}
+     * @param handler
+     * @param writeLimit
+     * @param throwOnWriteLimitReached
+     * @param parseContext
+     */
+    public WriteOutContentHandler(ContentHandler handler,
+                                  int writeLimit, boolean throwOnWriteLimitReached,
+                                  ParseContext parseContext) {
+        super(handler);
+        this.writeLimit = writeLimit;
+        this.throwOnWriteLimitReached = throwOnWriteLimitReached;
+        this.parseContext = parseContext;
+    }
+
     /**
      * Writes the given characters to the given character stream.
      */
     @Override
     public void characters(char[] ch, int start, int length) throws SAXException {
+        if (writeLimitReached) {
+            return;
+        }
         if (writeLimit == -1 || writeCount + length <= writeLimit) {
             super.characters(ch, start, length);
             writeCount += length;
         } else {
             super.characters(ch, start, writeLimit - writeCount);
-            writeCount = writeLimit;
-            throw new WriteLimitReachedException(writeLimit);
+            handleWriteLimitReached();
         }
     }
 
     @Override
     public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+        if (writeLimitReached) {
+            return;
+        }
         if (writeLimit == -1 || writeCount + length <= writeLimit) {
             super.ignorableWhitespace(ch, start, length);
             writeCount += length;
         } else {
             super.ignorableWhitespace(ch, start, writeLimit - writeCount);
-            writeCount = writeLimit;
+            handleWriteLimitReached();
+        }
+    }
+
+    private void handleWriteLimitReached() throws WriteLimitReachedException {
+        writeLimitReached = true;
+        writeCount = writeLimit;
+        if (throwOnWriteLimitReached) {
             throw new WriteLimitReachedException(writeLimit);
+        } else {
+            ParseRecord parseRecord = parseContext.get(ParseRecord.class);
+            if (parseRecord != null) {
+                parseRecord.setWriteLimitReached(true);
+            }
         }
     }
+
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
index 8e81d603d..ec3598d8d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -41,6 +41,7 @@ import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -49,6 +50,8 @@ import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.external.CompositeExternalParser;
 import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.apache.tika.sax.WriteOutContentHandler;
 
 public class AutoDetectParserTest extends TikaTest {
     // Easy to read constants for the MIME types:
@@ -403,6 +406,49 @@ public class AutoDetectParserTest extends TikaTest {
         assertNotNull(p);
     }
 
+    @Test
+    public void testWriteLimit() throws Exception {
+        ContentHandler handler = new WriteOutContentHandler(500);
+        Metadata metadata = new Metadata();
+        ParseContext parseContext = new ParseContext();
+        try (InputStream stream =
+                    getResourceAsStream("/test-documents/test_recursive_embedded.docx")) {
+            AUTO_DETECT_PARSER.parse(stream, handler, metadata, parseContext);
+            fail("write limit reached should have percolated to here");
+        } catch (WriteLimitReachedException e) {
+            //expected
+        }
+        String txt = handler.toString();
+        //test that the writelimit does intervene between these two
+        //pieces of text and that the first is there, but the second isn't
+        assertContains("assume among the powers", txt);
+        assertNotContained("unalienable Rights", txt);
+        //test that text from other embedded files after this one are not processed
+        assertNotContained("embed_4", txt);
+    }
+
+    @Test
+    public void testWriteLimitNoThrow() throws Exception {
+        ParseContext parseContext = new ParseContext();
+        ContentHandler handler = new WriteOutContentHandler(new ToXMLContentHandler(),
+                500, false, parseContext);
+        Metadata metadata = new Metadata();
+        try (InputStream stream =
+                    getResourceAsStream("/test-documents/test_recursive_embedded.docx")) {
+            AUTO_DETECT_PARSER.parse(stream, handler, metadata, parseContext);
+        }
+        String txt = handler.toString();
+        assertEquals("true", metadata.get(TikaCoreProperties.WRITE_LIMIT_REACHED));
+        //test that the writelimit does intervene between these two
+        //pieces of text and that the first is there, but the second isn't
+        assertContains("assume among the powers", txt);
+        assertNotContained("unalienable Rights", txt);
+        //test that text from other embedded files after this one are not processed,
+        //but that the entry is there for the embedded file, i.e. the parse continued
+        assertContains("id=\"embed4.txt\"", txt);
+        assertNotContained("embed_4", txt);
+    }
+
     //This is not the complete/correct way to look for parsers within another parser
     //However, it is good enough for this unit test for now.
     private Parser find(CompositeParser parser, Class clazz) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 847e5d129..03461d5f3 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -93,10 +93,12 @@ public class RecursiveParserWrapperTest extends TikaTest {
         Metadata metadata = new Metadata();
 
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(AUTO_DETECT_PARSER);
-        InputStream stream = getResourceAsStream("/test-documents/test_recursive_embedded.docx");
         RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
                 new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 70));
-        wrapper.parse(stream, handler, metadata, context);
+        try (InputStream stream =
+                    getResourceAsStream("/test-documents/test_recursive_embedded.docx")) {
+            wrapper.parse(stream, handler, metadata, context);
+        }
         List<Metadata> list = handler.getMetadataList();
 
         assertEquals(5, list.size());
@@ -111,6 +113,29 @@ public class RecursiveParserWrapperTest extends TikaTest {
         assertEquals(2, wlr);
     }
 
+    @Test
+    public void testCharLimitNoThrowOnWriteLimit() throws Exception {
+        ParseContext context = new ParseContext();
+        Metadata metadata = new Metadata();
+
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(AUTO_DETECT_PARSER);
+        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 500,
+                        false, context));
+        try (InputStream stream = getResourceAsStream("/test-documents/test_recursive_embedded" +
+                ".docx")) {
+            wrapper.parse(stream, handler, metadata, context);
+        }
+        List<Metadata> list = handler.getMetadataList();
+
+        assertEquals(12, list.size());
+
+        assertEquals("true", list.get(0).get(TikaCoreProperties.WRITE_LIMIT_REACHED));
+
+        assertContains("them to the separation", list.get(6).get(TikaCoreProperties.TIKA_CONTENT));
+        assertNotContained("unalienable Rights",
+                list.get(6).get(TikaCoreProperties.TIKA_CONTENT));
+    }
 
     @Test
     public void testMaxEmbedded() throws Exception {
diff --git a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
index 73c0737fb..714610786 100644
--- a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
+++ b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
@@ -162,7 +162,9 @@ public class JsonFetchEmitTuple {
             }
             fieldName = jParser.nextFieldName();
         }
-        return new HandlerConfig(handlerType, parseMode, writeLimit, maxEmbeddedResources);
+        //TODO: implement configuration of throwOnWriteLimitReached
+        return new HandlerConfig(handlerType, parseMode, writeLimit, maxEmbeddedResources,
+                true);
     }
 
     private static String getValue(JsonParser jParser) throws IOException {
diff --git a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
index a95431e54..aeb4fefd4 100644
--- a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
+++ b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
@@ -46,7 +46,7 @@ public class JsonFetchEmitTupleTest {
                 new EmitKey("my_emitter", "emitKey1"), m,
                 new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.XML,
                         HandlerConfig.PARSE_MODE.CONCATENATE,
-                        10000,10),
+                        10000,10, true),
                 FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
         StringWriter writer = new StringWriter();
         JsonFetchEmitTuple.toJson(t, writer);
@@ -69,7 +69,7 @@ public class JsonFetchEmitTupleTest {
                 new EmitKey("my_emitter", "emitKey1"), m,
                 new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.XML,
                         HandlerConfig.PARSE_MODE.CONCATENATE,
-                        10000,10),
+                        10000,10, true),
                 FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
         StringWriter writer = new StringWriter();
         JsonFetchEmitTuple.toJson(t, writer);
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 545d4ae2c..76e24b926 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -141,7 +141,7 @@ public class RecursiveMetadataResource {
         return new HandlerConfig(
                 BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE),
                 parseMode,
-                writeLimit, maxEmbeddedResources);
+                writeLimit, maxEmbeddedResources, TikaResource.getThrowOnWriteLimitReached(httpHeaders));
     }
 
     /**
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 5a60018f2..3d4954df8 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -73,6 +73,7 @@ import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.DigestingParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.pipes.HandlerConfig;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
@@ -590,12 +591,15 @@ public class TikaResource {
 
         logRequest(LOG, "/tika", metadata);
         int writeLimit = -1;
+        boolean throwOnWriteLimitReached = getThrowOnWriteLimitReached(httpHeaders);
         if (httpHeaders.containsKey("writeLimit")) {
             writeLimit = Integer.parseInt(httpHeaders.getFirst("writeLimit"));
         }
+
         BasicContentHandlerFactory.HANDLER_TYPE type =
                 BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE);
-        BasicContentHandlerFactory fact = new BasicContentHandlerFactory(type, writeLimit);
+        BasicContentHandlerFactory fact = new BasicContentHandlerFactory(type, writeLimit,
+                throwOnWriteLimitReached, context);
         ContentHandler contentHandler = fact.getNewContentHandler();
 
         try {
@@ -630,6 +634,20 @@ public class TikaResource {
         }
     }
 
+    public static boolean getThrowOnWriteLimitReached(MultivaluedMap<String, String> httpHeaders) {
+        if (httpHeaders.containsKey("throwOnWriteLimitReached")) {
+            String val = httpHeaders.getFirst("throwOnWriteLimitReached");
+            if ("true".equalsIgnoreCase(val)) {
+                return true;
+            } else if ("false".equalsIgnoreCase(val)) {
+                return false;
+            } else {
+                throw new IllegalArgumentException("'throwOnWriteLimitReached' must be either 'true' or 'false'");
+            }
+        }
+        return HandlerConfig.DEFAULT_HANDLER_CONFIG.isThrowOnWriteLimitReached();
+    }
+
     private StreamingOutput produceOutput(final InputStream is, Metadata metadata,
                                           final MultivaluedMap<String, String> httpHeaders,
                                           final UriInfo info, final String format) {
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
index d760dae1d..b1b73a896 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
@@ -223,7 +223,7 @@ public class TikaPipesTest extends CXFTestBase {
                         new EmitKey("fse", ""),
                         userMetadata,
                         new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.XML,
-                                HandlerConfig.PARSE_MODE.RMETA, -1, -1),
+                                HandlerConfig.PARSE_MODE.RMETA, -1, -1, true),
                         FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
         StringWriter writer = new StringWriter();
         JsonFetchEmitTuple.toJson(t, writer);
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
index eb5222bfe..82474b38a 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -170,6 +170,23 @@ public class TikaResourceTest extends CXFTestBase {
         assertEquals("true", metadata.get(TikaCoreProperties.WRITE_LIMIT_REACHED));
     }
 
+    @Test
+    public void testNoWriteLimitOnStreamingWrite() throws Exception {
+        //this test shows that write limit is not active for
+        //text or xhtml or anything that does streaming writes
+        Response response = WebClient.create(endPoint + TIKA_PATH).header("writeLimit", "100")
+                .accept("text/plain")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        String content = getStringFromInputStream((InputStream) response.getEntity());
+        assertContains("separation.", content);
+
+        response = WebClient.create(endPoint + TIKA_PATH).header("writeLimit", "100")
+                .accept("text/html")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        content = getStringFromInputStream((InputStream) response.getEntity());
+        assertContains("separation.</p>", content);
+    }
+
     @Test
     public void testJsonHandlerType() throws Exception {
         Response response = WebClient.create(endPoint + TIKA_PATH).accept("application/json")
diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
index 1663fb71d..3de5c0e65 100644
--- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
+++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
@@ -345,7 +345,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         assertEquals(1, metadataList.size());
         assertEquals("true", metadataList.get(0).get(TikaCoreProperties.WRITE_LIMIT_REACHED));
 
-        //now try with a write limit of 1000
+        //now try with a write limit of 200
         writeLimit = 200;
         response = WebClient.create(endPoint + META_PATH).accept("application/json")
                 .header("writeLimit", Integer.toString(writeLimit))
@@ -378,4 +378,39 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
         assertEquals("true", metadata.get(TikaCoreProperties.WRITE_LIMIT_REACHED));
     }
 
+    @Test
+    public void testNoThrowOnWriteLimitReached() throws Exception {
+        int writeLimit = 100;
+        Response response = WebClient.create(endPoint + META_PATH).accept("application/json")
+                .header("writeLimit", Integer.toString(writeLimit))
+                .header("throwOnWriteLimitReached", "false")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+        assertEquals(200, response.getStatus());
+        // Check results
+        Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(10, metadataList.size());
+        assertEquals("true", metadataList.get(0).get(TikaCoreProperties.WRITE_LIMIT_REACHED));
+
+        //now try with a write limit of 200
+        writeLimit = 200;
+        response = WebClient.create(endPoint + META_PATH).accept("application/json")
+                .header("writeLimit", Integer.toString(writeLimit))
+                .header("throwOnWriteLimitReached", "false")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+        assertEquals(200, response.getStatus());
+        // Check results
+        reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(10, metadataList.size());
+        assertEquals("true", metadataList.get(6).get(TikaCoreProperties.WRITE_LIMIT_REACHED));
+        assertContains("When in the Course of human events it becomes necessary for one people",
+                metadataList.get(6).get(TikaCoreProperties.TIKA_CONTENT));
+        TikaTest.assertNotContained("We hold these truths",
+                metadataList.get(6).get(TikaCoreProperties.TIKA_CONTENT));
+
+    }
+
 }
diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
index e48455b43..172600ec0 100644
--- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
+++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
@@ -195,7 +195,7 @@ public class TikaPipesTest extends CXFTestBase {
                         new EmitKey("fse", ""),
                         new Metadata(),
                         new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
-                                HandlerConfig.PARSE_MODE.CONCATENATE, -1, -1000),
+                                HandlerConfig.PARSE_MODE.CONCATENATE, -1, -1000, true),
                         FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
         StringWriter writer = new StringWriter();
         JsonFetchEmitTuple.toJson(t, writer);
diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
index a42c80f6f..a427b6e00 100644
--- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
+++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaResourceTest.java
@@ -581,7 +581,7 @@ public class TikaResourceTest extends CXFTestBase {
     @Test
     public void testJsonWriteLimitEmbedded() throws Exception {
         Response response =
-                WebClient.create(endPoint + TIKA_PATH + "/text").accept("application/json")
+                WebClient.create(endPoint + TIKA_PATH + "/html").accept("application/json")
                         .header("writeLimit", "500")
                         .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
         Metadata metadata = JsonMetadata.fromJson(
@@ -594,7 +594,26 @@ public class TikaResourceTest extends CXFTestBase {
         assertTrue(metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION)
                 .startsWith("org.apache.tika.exception.WriteLimitReachedException"));
         assertNotFound("embed4.txt", metadata.get(TikaCoreProperties.TIKA_CONTENT));
+    }
 
+    @Test
+    public void testJsonNoThrowWriteLimitEmbedded() throws Exception {
+        Response response =
+                WebClient.create(endPoint + TIKA_PATH + "/html").accept("application/json")
+                        .header("writeLimit", "500")
+                        .header("throwOnWriteLimitReached", "false")
+                        .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+        Metadata metadata = JsonMetadata.fromJson(
+                new InputStreamReader(((InputStream) response.getEntity()),
+                        StandardCharsets.UTF_8));
+        String txt = metadata.get(TikaCoreProperties.TIKA_CONTENT);
+        assertContains("embed2a.txt", txt);
+        assertContains("When in the Course", txt);
+        assertNotFound("declare the causes", txt);
+        assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
+        assertEquals("true", metadata.get(TikaCoreProperties.WRITE_LIMIT_REACHED));
+        assertContains("<div class=\"embedded\" id=\"embed4.txt",
+                metadata.get(TikaCoreProperties.TIKA_CONTENT));
     }
 
     @Test