You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/06/07 19:51:00 UTC

[tika] branch branch_1x updated: TIKA-2659 -- add parameters for max files processed in forkclient, and improve some of the offline smoke testing infrastructure.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new f2b0e5a  TIKA-2659 -- add parameters for max files processed in forkclient, and improve some of the offline smoke testing infrastructure.
f2b0e5a is described below

commit f2b0e5a9529833736e954b874cd2278dc945a613
Author: tballison <ta...@mitre.org>
AuthorDate: Thu Jun 7 11:32:33 2018 -0400

    TIKA-2659 -- add parameters for max files processed in forkclient, and
    improve some of the offline smoke testing infrastructure.
---
 .../main/java/org/apache/tika/fork/ForkClient.java | 18 ++++-
 .../main/java/org/apache/tika/fork/ForkParser.java | 26 ++++++-
 .../org/apache/tika/MultiThreadedTikaTest.java     | 81 +++++++++++-----------
 .../tika/detect/TestContainerAwareDetector.java    | 15 ++++
 .../apache/tika/parser/chm/TestChmExtraction.java  |  6 +-
 .../parser/fork/ForkParserIntegrationTest.java     | 46 +++++++++++-
 .../resources/test-documents/mock/system_exit.xml  | 25 +++++++
 .../test-documents/mock/thread_interrupt.xml       | 25 +++++++
 8 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
index f599cb6..5090084 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
@@ -28,6 +28,7 @@ import java.io.NotSerializableException;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.jar.JarEntry;
 import java.util.jar.JarOutputStream;
 import java.util.zip.ZipEntry;
@@ -40,6 +41,7 @@ import org.apache.tika.utils.ProcessUtils;
 import org.xml.sax.ContentHandler;
 
 class ForkClient {
+    private static AtomicInteger CLIENT_COUNTER = new AtomicInteger(0);
 
     private final List<ForkResource> resources = new ArrayList<>();
 
@@ -53,6 +55,11 @@ class ForkClient {
 
     private final DataInputStream input;
 
+    //this is used for debugging/smoke testing
+    private final int id = CLIENT_COUNTER.incrementAndGet();
+
+    private volatile int filesProcessed = 0;
+
     public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, List<String> java,
                       TimeoutLimits timeoutLimits) throws IOException, TikaException {
         this(tikaDir, parserFactoryFactory, null, java, timeoutLimits);
@@ -192,6 +199,7 @@ class ForkClient {
 
     public synchronized Throwable call(String method, Object... args)
             throws IOException, TikaException {
+        filesProcessed++;
         List<ForkResource> r = new ArrayList<>(resources);
         output.writeByte(ForkServer.CALL);
         output.writeUTF(method);
@@ -201,6 +209,10 @@ class ForkClient {
         return waitForResponse(r);
     }
 
+    public int getFilesProcessed() {
+        return filesProcessed;
+    }
+
     /**
      * Serializes the object first into an in-memory buffer and then
      * writes it to the output stream with a preceding size integer.
@@ -228,7 +240,7 @@ class ForkClient {
         }
 
         try {
-           ForkObjectInputStream.sendObject(object, output);
+            ForkObjectInputStream.sendObject(object, output);
         } catch(NotSerializableException nse) {
            // Build a more friendly error message for this
            throw new TikaException(
@@ -342,4 +354,8 @@ class ForkClient {
             }
         }
     }
+
+    public int getId() {
+        return id;
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
index da0b573..c7b51ac 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
@@ -67,11 +67,16 @@ public class ForkParser extends AbstractParser {
 
     @Field
     private long serverPulseMillis = 1000;
+
     @Field
     private long serverParseTimeoutMillis = 60000;
+
     @Field
     private long serverWaitTimeoutMillis = 60000;
 
+    @Field
+    private int maxFilesProcessedPerClient = -1;
+
     /**
      * If you have a directory with, say, tike-app.jar and you want the child process/server to build a parser
      * and run it from that -- so that you can keep all of those dependencies out of your client code, use
@@ -337,13 +342,16 @@ public class ForkParser extends AbstractParser {
             //TODO: make this more useful
             throw new IllegalStateException("Unexpected combination of state items");
         }
-
     }
 
     private synchronized void releaseClient(ForkClient client, boolean alive) {
         currentlyInUse--;
         if (currentlyInUse + pool.size() < poolSize && alive) {
-            pool.offer(client);
+            if (maxFilesProcessedPerClient > 0 && client.getFilesProcessed() >= maxFilesProcessedPerClient) {
+                client.close();
+            } else {
+                pool.offer(client);
+            }
             notifyAll();
         } else {
             client.close();
@@ -383,4 +391,18 @@ public class ForkParser extends AbstractParser {
         this.serverWaitTimeoutMillis = serverWaitTimeoutMillis;
     }
 
+    /**
+     * If there is a slowly building memory leak in one of the parsers,
+     * it is useful to set a limit on the number of files processed
+     * by a server before it is shutdown and restarted. Default value is -1.
+     *
+     * @param maxFilesProcessedPerClient maximum number of files that a server can handle
+     *                                 before the parser shuts down a client and creates
+     *                                 a new process. If set to -1, the server is never restarted
+     *                                 because of the number of files handled.
+     */
+    public void setMaxFilesProcessedPerServer(int maxFilesProcessedPerClient) {
+        this.maxFilesProcessedPerClient = maxFilesProcessedPerClient;
+    }
+
 }
diff --git a/tika-core/src/test/java/org/apache/tika/MultiThreadedTikaTest.java b/tika-core/src/test/java/org/apache/tika/MultiThreadedTikaTest.java
index 3d5094b..9450f24 100644
--- a/tika-core/src/test/java/org/apache/tika/MultiThreadedTikaTest.java
+++ b/tika-core/src/test/java/org/apache/tika/MultiThreadedTikaTest.java
@@ -23,7 +23,6 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
@@ -32,7 +31,6 @@ import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
 import org.apache.tika.utils.XMLReaderUtils;
 
-import javax.xml.namespace.QName;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.io.InputStream;
@@ -55,6 +53,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.junit.Assert.assertEquals;
 
@@ -64,7 +63,8 @@ public class MultiThreadedTikaTest extends TikaTest {
     XmlRootExtractor ex = new XmlRootExtractor();
 
     /**
-     * This calls {@link #testEach(Path[], ParseContext[], int, int)} and then {@link #testAll(Path[], ParseContext[], int, int)}
+     * This calls {@link #testEach(Parser parser, Path[], ParseContext[], int, int)} and
+     * then {@link #testAll(Parser parser, Path[], ParseContext[], int, int)}
      *
      * @param numThreads    number of threads to use
      * @param numIterations number of iterations per thread
@@ -72,10 +72,10 @@ public class MultiThreadedTikaTest extends TikaTest {
      *                      all files will be used
      * @throws Exception
      */
-    protected void testMultiThreaded(ParseContext[] parseContext, int numThreads, int numIterations, FileFilter filter) throws Exception {
+    protected void testMultiThreaded(Parser parser, ParseContext[] parseContext, int numThreads, int numIterations, FileFilter filter) throws Exception {
         Path[] allFiles = getTestFiles(filter);
-        testEach(allFiles, parseContext, numThreads, numIterations);
-        testAll(allFiles, parseContext, numThreads, numIterations);
+        testEach(parser, allFiles, parseContext, numThreads, numIterations);
+        testAll(parser, allFiles, parseContext, numThreads, numIterations);
     }
 
     public void testDetector(Detector detector, int numThreads, int numIterations, FileFilter filter, int randomlyResizeSAXPool) throws Exception {
@@ -156,11 +156,11 @@ public class MultiThreadedTikaTest extends TikaTest {
      * @param numThreads    number of threads to use
      * @param numIterations number of iterations per thread
      */
-    protected void testEach(Path[] files, ParseContext[] parseContext, int numThreads, int numIterations) {
+    protected void testEach(Parser parser, Path[] files, ParseContext[] parseContext, int numThreads, int numIterations) {
         for (Path p : files) {
             Path[] toTest = new Path[1];
             toTest[0] = p;
-            testAll(toTest, parseContext, numThreads, numIterations);
+            testAll(parser, toTest, parseContext, numThreads, numIterations);
         }
     }
 
@@ -177,12 +177,12 @@ public class MultiThreadedTikaTest extends TikaTest {
      * @param numThreads    number of parser threads
      * @param numIterations number of iterations per parser
      */
-    protected void testAll(Path[] files, ParseContext[] parseContext, int numThreads, int numIterations) {
+    protected void testAll(Parser parser, Path[] files, ParseContext[] parseContext, int numThreads, int numIterations) {
 
-        Map<Path, Extract> truth = getBaseline(files, parseContext[0]);
+        Map<Path, Extract> truth = getBaseline(parser, files, parseContext[0]);
         //if all files caused an exception
         if (truth.size() == 0) {
-            return;
+            //return;
         }
         //only those that parsed without exception
         Path[] testFiles = new Path[truth.size()];
@@ -193,23 +193,21 @@ public class MultiThreadedTikaTest extends TikaTest {
 
         ExecutorService ex = Executors.newFixedThreadPool(numThreads);
         try {
-            _testAll(testFiles, parseContext, numThreads, numIterations, truth, ex);
+            _testAll(parser, files, parseContext, numThreads, numIterations, truth, ex);
         } finally {
             ex.shutdown();
             ex.shutdownNow();
         }
     }
 
-    private void _testAll(Path[] testFiles, ParseContext[] parseContext, int numThreads, int numIterations,
+    private void _testAll(Parser parser, Path[] testFiles, ParseContext[] parseContext, int numThreads, int numIterations,
                           Map<Path, Extract> truth, ExecutorService ex) {
 
         ExecutorCompletionService<Integer> executorCompletionService = new ExecutorCompletionService<>(ex);
 
         //use the same parser in all threads
-        Parser parser = new AutoDetectParser();
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
         for (int i = 0; i < numThreads; i++) {
-            executorCompletionService.submit(new TikaRunner(wrapper, parseContext[i], numIterations, testFiles, truth));
+            executorCompletionService.submit(new TikaRunner(parser, parseContext[i], numIterations, testFiles, truth));
         }
 
         int completed = 0;
@@ -240,9 +238,7 @@ public class MultiThreadedTikaTest extends TikaTest {
                     return FileVisitResult.CONTINUE;
                 }
                 if (!attrs.isDirectory()) {
-                    if (files.size() < 20) {
-                        files.add(file);
-                    }
+                    files.add(file);
                 }
                 return FileVisitResult.CONTINUE;
             }
@@ -266,22 +262,17 @@ public class MultiThreadedTikaTest extends TikaTest {
         return baseline;
     }
 
-    private static ConcurrentHashMap<Path, Extract> getBaseline(Path[] files, ParseContext parseContext) {
+    private static ConcurrentHashMap<Path, Extract> getBaseline(Parser parser, Path[] files, ParseContext parseContext) {
         ConcurrentHashMap<Path, Extract> baseline = new ConcurrentHashMap<>();
+
         for (Path f : files) {
+            try (TikaInputStream is = TikaInputStream.get(f)) {
 
-            try {
-                Parser p = new AutoDetectParser();
-                RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p);
-                RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
-                        new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                        -1);
-                try (TikaInputStream is = TikaInputStream.get(f)) {
-                    wrapper.parse(is, handler, new Metadata(), parseContext);
-                }
-                List<Metadata> metadataList = handler.getMetadataList();
+                List<Metadata> metadataList = getRecursiveMetadata(is, parser, parseContext);
                 baseline.put(f, new Extract(metadataList));
+
             } catch (Exception e) {
+                e.printStackTrace();
                 //swallow
             }
         }
@@ -289,15 +280,15 @@ public class MultiThreadedTikaTest extends TikaTest {
     }
 
     private static List<Metadata> getRecursiveMetadata(InputStream is,
-                                                       RecursiveParserWrapper wrapper, ParseContext parseContext) throws Exception {
+                                                       Parser parser, ParseContext parseContext) throws Exception {
         //different from parent TikaTest in that this extracts text.
         //can't extract xhtml because "tmp" file names wind up in
         //content's metadata and they'll differ by file.
-
+        parseContext = new ParseContext();
         RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
                 new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
                 -1);
-        wrapper.parse(is, handler, new Metadata(), parseContext);
+        parser.parse(is, handler, new Metadata(), parseContext);
         return handler.getMetadataList();
     }
 
@@ -358,20 +349,22 @@ public class MultiThreadedTikaTest extends TikaTest {
 
 
     //TODO: make this return something useful besides an integer
-    private class TikaRunner implements Callable<Integer> {
-        private final RecursiveParserWrapper parser;
+    private static class TikaRunner implements Callable<Integer> {
+        private static AtomicInteger threadCount = new AtomicInteger(0);
+        private final Parser parser;
         private final int iterations;
         private final Path[] files;
         private final Map<Path, Extract> truth;
         private final ParseContext parseContext;
         private final Random random = new Random();
-
-        private TikaRunner(RecursiveParserWrapper parser, ParseContext parseContext, int iterations, Path[] files, Map<Path, Extract> truth) {
+        private final int threadNumber;
+        private TikaRunner(Parser parser, ParseContext parseContext, int iterations, Path[] files, Map<Path, Extract> truth) {
             this.parser = parser;
             this.iterations = iterations;
             this.files = files;
             this.truth = truth;
             this.parseContext = parseContext;
+            threadNumber = threadCount.getAndIncrement();
         }
 
         @Override
@@ -379,11 +372,17 @@ public class MultiThreadedTikaTest extends TikaTest {
             for (int i = 0; i < iterations; i++) {
                 int randIndex = random.nextInt(files.length);
                 Path testFile = files[randIndex];
+                List<Metadata> metadataList = null;
+                boolean success = false;
                 try (InputStream is = Files.newInputStream(testFile)) {
-                    List<Metadata> metadataList = getRecursiveMetadata(is, parser, parseContext);
-                    assertExtractEquals(truth.get(testFile), new Extract(metadataList));
+                    metadataList = getRecursiveMetadata(is, parser, new ParseContext());
+                    success = true;
                 } catch (Exception e) {
-                    throw new RuntimeException(testFile + " triggered this exception", e);
+                    //swallow
+                    //throw new RuntimeException(testFile + " triggered this exception", e);
+                }
+                if (success) {
+                    assertExtractEquals(truth.get(testFile), new Extract(metadataList));
                 }
             }
             return 1;
@@ -391,7 +390,7 @@ public class MultiThreadedTikaTest extends TikaTest {
 
     }
 
-    private void assertExtractEquals(Extract extractA, Extract extractB) {
+    private static void assertExtractEquals(Extract extractA, Extract extractB) {
         //this currently only checks the basics
         //might want to add more checks
 
diff --git a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index f7644e8..57b91ca 100644
--- a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -26,6 +26,7 @@ import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Path;
+import java.util.Random;
 
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.tika.MultiThreadedTikaTest;
@@ -467,15 +468,29 @@ public class TestContainerAwareDetector extends MultiThreadedTikaTest {
 
     @Test
     public void testAllMultithreaded() throws Exception {
+
         Detector detector = new Tika().getDetector();
         FileFilter filter = new FileFilter() {
+            //TODO: create proper randomized framework that will record seed, etc...
+            private final Random random = new Random();
+            //increase this to the number of files for a true smoke test
+            //for now, randomly pick 20 files.
+            int toProcess = 20;
+            int processed = 0;
             @Override
             public boolean accept(File pathname) {
+                if (processed >= toProcess) {
+                    return false;
+                } else if (random.nextBoolean()) {
+                    processed++;
                     return true;
+                }
+                return false;
             }
         };
         int numThreads = 20;
         XMLReaderUtils.setPoolSize(numThreads);
+
         testDetector(detector, numThreads, 50, filter, numThreads*3);
     }
 
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java b/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
index 3129453..c49a006 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtraction.java
@@ -19,8 +19,10 @@ package org.apache.tika.parser.chm;
 import org.apache.tika.MultiThreadedTikaTest;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
 import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
 import org.apache.tika.parser.chm.core.ChmExtractor;
@@ -218,7 +220,9 @@ public class TestChmExtraction extends MultiThreadedTikaTest {
         for (int i = 0; i < parseContexts.length; i++) {
             parseContexts[i] = new ParseContext();
         }
-        testMultiThreaded(parseContexts, 10, 10, new FileFilter() {
+        Parser p = new AutoDetectParser();
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p);
+        testMultiThreaded(wrapper, parseContexts, 10, 10, new FileFilter() {
                     @Override
                     public boolean accept(File pathname) {
                         if (pathname.getName().toLowerCase(Locale.ENGLISH).endsWith(".chm")) {
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
index 4baf634..5ed4788 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -22,6 +22,8 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.fail;
 
+import java.io.File;
+import java.io.FileFilter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.NotSerializableException;
@@ -29,6 +31,8 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.apache.tika.MultiThreadedTikaTest;
 import org.apache.tika.Tika;
 import org.apache.tika.TikaTest;
 import org.apache.tika.detect.Detector;
@@ -39,7 +43,9 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -48,7 +54,7 @@ import org.xml.sax.SAXException;
  * Test that the ForkParser correctly behaves when
  *  wired in to the regular Parsers and their test data
  */
-public class ForkParserIntegrationTest extends TikaTest {
+public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
 
     private Tika tika = new Tika(); // TODO Use TikaConfig instead, when it works
 
@@ -287,4 +293,42 @@ public class ForkParserIntegrationTest extends TikaTest {
             parser.close();
         }
     }
+
+    @Test
+    @Ignore("use for development/one off testing.  This is a beast and takes enormous resources and time")
+    public void smokeTest() throws Exception {
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(tika.getParser());
+        int numThreads = 5;
+        ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(),
+                wrapper);
+        parser.setServerPulseMillis(500);
+        parser.setServerParseTimeoutMillis(1000);
+        parser.setPoolSize(numThreads);
+        ParseContext[] parseContexts = new ParseContext[numThreads];
+        for (int i = 0; i < numThreads; i++) {
+            parseContexts[i] = new ParseContext();
+        }
+        try {
+            super.testMultiThreaded(parser, parseContexts, numThreads, 5,
+                    new FileFilter() {
+                        @Override
+                        public boolean accept(File pathname) {
+                            if (pathname.getAbsolutePath().contains("mock")) {
+                                return true;
+                            } else {
+                                return false;
+                            }/*
+                            if (pathname.getName().contains("11_hang.rar") ||
+                                    pathname.getName().contains("radar_profiles_2009.mat") ||
+                                    pathname.getAbsolutePath().contains("mock")) {
+                                //return false;
+                            }
+                            return true;*/
+                        }
+                    });
+        } catch (Throwable t) {
+            t.printStackTrace();
+        }
+    }
+
 }
diff --git a/tika-parsers/src/test/resources/test-documents/mock/system_exit.xml b/tika-parsers/src/test/resources/test-documents/mock/system_exit.xml
new file mode 100644
index 0000000..75d1d3b
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/mock/system_exit.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <system_exit />
+</mock>
\ No newline at end of file
diff --git a/tika-parsers/src/test/resources/test-documents/mock/thread_interrupt.xml b/tika-parsers/src/test/resources/test-documents/mock/thread_interrupt.xml
new file mode 100644
index 0000000..3e54512
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/mock/thread_interrupt.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <thread_interrupt />
+</mock>
\ No newline at end of file

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.