You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/06/01 12:51:13 UTC

[tika] branch master updated: TIKA-2656 -- allow absolute timeout for ForkParser

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 929dcd7  TIKA-2656 -- allow absolute timeout for ForkParser
929dcd7 is described below

commit 929dcd75b8480a2a2d19a1801a88973b5e9358ac
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Jun 1 08:50:47 2018 -0400

    TIKA-2656 -- allow absolute timeout for ForkParser
---
 CHANGES.txt                                        |   3 +
 .../main/java/org/apache/tika/fork/ForkClient.java |  79 ++++-----------
 .../main/java/org/apache/tika/fork/ForkParser.java |  42 ++++++--
 .../main/java/org/apache/tika/fork/ForkServer.java | 110 ++++++++++++---------
 .../java/org/apache/tika/fork/TimeoutLimits.java   |  43 ++++++++
 .../java/org/apache/tika/fork/ForkParserTest.java  |  20 ++--
 .../apache/tika/fork/ForkParserTikaBinTest.java    |  25 +++--
 .../fork => test-documents}/basic_embedded.xml     |   0
 .../fork => test-documents}/embedded_then_npe.xml  |   0
 .../fork => test-documents}/embedded_with_npe.xml  |   0
 10 files changed, 189 insertions(+), 133 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 891f852..c6503f3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,9 @@ Release 2.0.0 - ???
 
 Release 1.19 ???
 
+   * Add absolute timeout to ForkParser rather than testing
+     for active (TIKA-2656).
+
    * Make the RecursiveParserWrapper work with the ForkParser (TIKA-2655).
 
    * Allow the ForkParser to specify a directory containing tika-app.jar
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
index fc86784..a79de48 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
@@ -53,65 +53,21 @@ class ForkClient {
 
     private final DataInputStream input;
 
-    private final InputStream error;
-
-    public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, List<String> java, long serverPulseMillis) throws IOException, TikaException {
-        jar = null;
-        loader = null;
-        boolean ok = false;
-        ProcessBuilder builder = new ProcessBuilder();
-        List<String> command = new ArrayList<>();
-        command.addAll(java);
-        command.add("-cp");
-        String dirString = tikaDir.toAbsolutePath().toString();
-        if (!dirString.endsWith("/")) {
-            dirString += "/*";
-        } else {
-            dirString += "/";
-        }
-        dirString = ProcessUtils.escapeCommandLine(dirString);
-        command.add(dirString);
-        command.add("org.apache.tika.fork.ForkServer");
-        command.add(Long.toString(serverPulseMillis));
-        builder.command(command);
-        builder.redirectError(ProcessBuilder.Redirect.INHERIT);
-        try {
-            this.process = builder.start();
-
-            this.output = new DataOutputStream(process.getOutputStream());
-            this.input = new DataInputStream(process.getInputStream());
-            this.error = process.getErrorStream();
-
-            waitForStartBeacon();
-            output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY);
-            output.flush();
-            sendObject(parserFactoryFactory, resources);
-
-            waitForStartBeacon();
-
-            ok = true;
-        } catch (Throwable t) {
-            t.printStackTrace();
-            throw t;
-        } finally {
-            if (!ok) {
-                close();
-            }
-        }
+    public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, List<String> java,
+                      TimeoutLimits timeoutLimits) throws IOException, TikaException {
+        this(tikaDir, parserFactoryFactory, null, java, timeoutLimits);
     }
-
     /**
      *
      * @param tikaDir directory containing jars from which to start the child server and load the Parser
      * @param parserFactoryFactory factory to send to child process to build parser upon arrival
      * @param classLoader class loader to use for non-parser resource (content-handler, etc.)
      * @param java java commandline to use for the commandline server
-     * @param serverPulseMillis how often to check if the server has been active
      * @throws IOException
      * @throws TikaException
      */
     public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, ClassLoader classLoader,
-                      List<String> java, long serverPulseMillis) throws IOException, TikaException {
+                      List<String> java, TimeoutLimits timeoutLimits) throws IOException, TikaException {
         jar = null;
         loader = null;
         boolean ok = false;
@@ -128,7 +84,9 @@ class ForkClient {
         dirString = ProcessUtils.escapeCommandLine(dirString);
         command.add(dirString);
         command.add("org.apache.tika.fork.ForkServer");
-        command.add(Long.toString(serverPulseMillis));
+        command.add(Long.toString(timeoutLimits.getPulseMS()));
+        command.add(Long.toString(timeoutLimits.getParseTimeoutMS()));
+        command.add(Long.toString(timeoutLimits.getWaitTimeoutMS()));
         builder.command(command);
         builder.redirectError(ProcessBuilder.Redirect.INHERIT);
         try {
@@ -136,13 +94,18 @@ class ForkClient {
 
             this.output = new DataOutputStream(process.getOutputStream());
             this.input = new DataInputStream(process.getInputStream());
-            this.error = process.getErrorStream();
 
             waitForStartBeacon();
-            output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY_LOADER);
+            if (classLoader != null) {
+                output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY_LOADER);
+            } else {
+                output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY);
+            }
             output.flush();
             sendObject(parserFactoryFactory, resources);
-            sendObject(classLoader, resources);
+            if (classLoader != null) {
+                sendObject(classLoader, resources);
+            }
             waitForStartBeacon();
             ok = true;
         } catch (Throwable t) {
@@ -156,7 +119,7 @@ class ForkClient {
     }
 
 
-    public ForkClient(ClassLoader loader, Object object, List<String> java, long serverPulseMillis)
+    public ForkClient(ClassLoader loader, Object object, List<String> java, TimeoutLimits timeoutLimits)
             throws IOException, TikaException {
         boolean ok = false;
         try {
@@ -168,14 +131,15 @@ class ForkClient {
             command.addAll(java);
             command.add("-jar");
             command.add(jar.getPath());
-            command.add(Long.toString(serverPulseMillis));
+            command.add(Long.toString(timeoutLimits.getPulseMS()));
+            command.add(Long.toString(timeoutLimits.getParseTimeoutMS()));
+            command.add(Long.toString(timeoutLimits.getWaitTimeoutMS()));
             builder.command(command);
             builder.redirectError(ProcessBuilder.Redirect.INHERIT);
             this.process = builder.start();
 
             this.output = new DataOutputStream(process.getOutputStream());
             this.input = new DataInputStream(process.getInputStream());
-            this.error = process.getErrorStream();
 
             waitForStartBeacon();
             output.writeByte(ForkServer.INIT_LOADER_PARSER);
@@ -283,13 +247,10 @@ class ForkClient {
             if (input != null) {
                 input.close();
             }
-            if (error != null) {
-                error.close();
-            }
         } catch (IOException ignore) {
         }
         if (process != null) {
-            process.destroy();
+            process.destroyForcibly();
             try {
                 //TIKA-1933
                 process.waitFor();
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
index 3df9826..da0b573 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Queue;
 import java.util.Set;
 
+import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -57,13 +58,19 @@ public class ForkParser extends AbstractParser {
     private List<String> java = Arrays.asList("java", "-Xmx32m");
 
     /** Process pool size */
+    @Field
     private int poolSize = 5;
 
     private int currentlyInUse = 0;
 
     private final Queue<ForkClient> pool = new LinkedList<>();
 
-    private long serverPulseMillis = 5000;
+    @Field
+    private long serverPulseMillis = 1000;
+    @Field
+    private long serverParseTimeoutMillis = 60000;
+    @Field
+    private long serverWaitTimeoutMillis = 60000;
 
     /**
      * If you have a directory with, say, tike-app.jar and you want the child process/server to build a parser
@@ -319,13 +326,13 @@ public class ForkParser extends AbstractParser {
     }
 
     private ForkClient newClient() throws IOException, TikaException {
-
+        TimeoutLimits timeoutLimits = new TimeoutLimits(serverPulseMillis, serverParseTimeoutMillis, serverWaitTimeoutMillis);
         if (loader == null && parser == null && tikaBin != null && parserFactoryFactory != null) {
-            return new ForkClient(tikaBin, parserFactoryFactory, java, serverPulseMillis);
+            return new ForkClient(tikaBin, parserFactoryFactory, java, timeoutLimits);
         } else if (loader != null && parser != null && tikaBin == null && parserFactoryFactory == null) {
-           return new ForkClient(loader, parser, java, serverPulseMillis);
+           return new ForkClient(loader, parser, java, timeoutLimits);
         } else if (loader != null && parser == null && tikaBin != null && parserFactoryFactory != null) {
-            return new ForkClient(tikaBin, parserFactoryFactory, loader, java, serverPulseMillis);
+            return new ForkClient(tikaBin, parserFactoryFactory, loader, java, timeoutLimits);
         } else {
             //TODO: make this more useful
             throw new IllegalStateException("Unexpected combination of state items");
@@ -345,8 +352,8 @@ public class ForkParser extends AbstractParser {
 
     /**
      * The amount of time in milliseconds that the server
-     * should wait for any input or output.  If it receives no
-     * input or output in this amount of time, it will shutdown.
+     * should wait before checking to see if the parse has timed out
+     * or if the wait has timed out
      * The default is 5 seconds.
      *
      * @param serverPulseMillis milliseconds to sleep before checking if there has been any activity
@@ -355,4 +362,25 @@ public class ForkParser extends AbstractParser {
         this.serverPulseMillis = serverPulseMillis;
     }
 
+    /**
+     * The maximum amount of time allowed for the server to try to parse a file.
+     * If more than this time elapses, the server shuts down, and the ForkParser
+     * throws an exception.
+     *
+     * @param serverParseTimeoutMillis
+     */
+    public void setServerParseTimeoutMillis(long serverParseTimeoutMillis) {
+        this.serverParseTimeoutMillis = serverParseTimeoutMillis;
+    }
+
+    /**
+     * The maximum amount of time allowed for the server to wait for a new request to parse
+     * a file.  The server will shutdown after this amount of time, and a new server will have
+     * to be started by a new client.
+     * @param serverWaitTimeoutMillis
+     */
+    public void setServerWaitTimeoutMillis(long serverWaitTimeoutMillis) {
+        this.serverWaitTimeoutMillis = serverWaitTimeoutMillis;
+    }
+
 }
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java b/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
index f4c49da..18345db 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.NotSerializableException;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.net.URL;
@@ -34,7 +35,7 @@ import java.util.zip.CheckedInputStream;
 import java.util.zip.CheckedOutputStream;
 import java.util.zip.Checksum;
 
-class ForkServer implements Runnable, Checksum {
+class ForkServer implements Runnable {
 
     public static final byte ERROR = -1;
 
@@ -57,6 +58,11 @@ class ForkServer implements Runnable, Checksum {
     //milliseconds to sleep before checking to see if there has been any reading/writing
     //If no reading or writing in this time, shutdown the server.
     private long serverPulseMillis = 5000;
+    private long serverParserTimeoutMillis = 60000;
+    private long serverWaitTimeoutMillis = 60000;
+
+    private Object[] lock = new Object[0];
+
     /**
      * Starts a forked server process using the standard input and output
      * streams for communication with the parent process. Any attempts by
@@ -67,13 +73,14 @@ class ForkServer implements Runnable, Checksum {
      * @throws Exception if the server could not be started
      */
     public static void main(String[] args) throws Exception {
-        long serverPulseMillis = -1;
-        if (args.length > 0) {
-            serverPulseMillis = Long.parseLong(args[0]);
-        }
+        long serverPulseMillis = Long.parseLong(args[0]);
+        long serverParseTimeoutMillis = Long.parseLong(args[1]);
+        long serverWaitTimeoutMillis = Long.parseLong(args[2]);
+
         URL.setURLStreamHandlerFactory(new MemoryURLStreamHandlerFactory());
 
-        ForkServer server = new ForkServer(System.in, System.out, serverPulseMillis);
+        ForkServer server = new ForkServer(System.in, System.out,
+                serverPulseMillis, serverParseTimeoutMillis, serverWaitTimeoutMillis);
         System.setIn(new ByteArrayInputStream(new byte[0]));
         System.setOut(System.err);
 
@@ -97,6 +104,9 @@ class ForkServer implements Runnable, Checksum {
     private Object parser;
     private ClassLoader classLoader;
 
+    private boolean parsing = false;
+    private long since;
+
     /**
      * Sets up a forked server instance using the given stdin/out
      * communication channel.
@@ -105,19 +115,31 @@ class ForkServer implements Runnable, Checksum {
      * @param output output stream for writing to the parent process
      * @throws IOException if the server instance could not be created
      */
-    public ForkServer(InputStream input, OutputStream output, long serverPulseMillis)
+    public ForkServer(InputStream input, OutputStream output,
+                      long serverPulseMillis, long serverParserTimeoutMillis, long serverWaitTimeoutMillis)
             throws IOException {
         this.input =
-            new DataInputStream(new CheckedInputStream(input, this));
+            new DataInputStream(input);
         this.output =
-            new DataOutputStream(new CheckedOutputStream(output, this));
+            new DataOutputStream(output);
         this.serverPulseMillis = serverPulseMillis;
+        this.serverParserTimeoutMillis = serverParserTimeoutMillis;
+        this.serverWaitTimeoutMillis = serverWaitTimeoutMillis;
+        this.parsing = false;
+        this.since = System.currentTimeMillis();
     }
 
     public void run() {
         try {
-            while (active) {
-                active = false;
+            while (true) {
+                synchronized (lock) {
+                    long elapsed = System.currentTimeMillis()-since;
+                    if (parsing && elapsed > serverParserTimeoutMillis) {
+                        break;
+                    } else if (!parsing && serverWaitTimeoutMillis > 0 && elapsed > serverWaitTimeoutMillis) {
+                        break;
+                    }
+                }
                 Thread.sleep(serverPulseMillis);
             }
             System.exit(0);
@@ -126,6 +148,7 @@ class ForkServer implements Runnable, Checksum {
     }
 
     public void processRequests() {
+        //initialize
         try {
             initializeParserAndLoader();
         } catch (Throwable t) {
@@ -140,6 +163,7 @@ class ForkServer implements Runnable, Checksum {
             }
             return;
         }
+        //main loop
         try {
             while (true) {
                 int request = input.read();
@@ -210,26 +234,38 @@ class ForkServer implements Runnable, Checksum {
     }
 
     private void call(ClassLoader loader, Object object) throws Exception {
-        Method method = getMethod(object, input.readUTF());
-        Object[] args =
-            new Object[method.getParameterTypes().length];
-        for (int i = 0; i < args.length; i++) {
-            args[i] = readObject(loader);
+        synchronized (lock) {
+            parsing = true;
+            since = System.currentTimeMillis();
         }
         try {
-            method.invoke(object, args);
-            output.write(DONE);
-        } catch (InvocationTargetException e) {
-            output.write(ERROR);
-            // Try to send the underlying Exception itself
-            Throwable toSend = e.getCause();
+            Method method = getMethod(object, input.readUTF());
+            Object[] args =
+                    new Object[method.getParameterTypes().length];
+            for (int i = 0; i < args.length; i++) {
+                args[i] = readObject(loader);
+            }
             try {
-               ForkObjectInputStream.sendObject(toSend, output);
-            } catch (NotSerializableException nse) {
-               // Need to build a serializable version of it
-               TikaException te = new TikaException( toSend.getMessage() );
-               te.setStackTrace( toSend.getStackTrace() );
-               ForkObjectInputStream.sendObject(te, output);
+                method.invoke(object, args);
+                output.write(DONE);
+            } catch (InvocationTargetException e) {
+                output.write(ERROR);
+                // Try to send the underlying Exception itself
+                Throwable toSend = e.getCause();
+                try {
+                    ForkObjectInputStream.sendObject(toSend, output);
+                } catch (NotSerializableException nse) {
+                    // Need to build a serializable version of it
+                    TikaException te = new TikaException(toSend.getMessage());
+                    te.setStackTrace(toSend.getStackTrace());
+                    ForkObjectInputStream.sendObject(te, output);
+                }
+
+            }
+        } finally {
+            synchronized (lock) {
+                parsing = false;
+                since = System.currentTimeMillis();
             }
         }
     }
@@ -271,22 +307,4 @@ class ForkServer implements Runnable, Checksum {
 
         return object;
     }
-
-    //------------------------------------------------------------< Checksum >
-
-    public void update(int b) {
-        active = true;
-    }
-
-    public void update(byte[] b, int off, int len) {
-        active = true;
-    }
-
-    public long getValue() {
-        return 0;
-    }
-
-    public void reset() {
-    }
-
 }
diff --git a/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java b/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java
new file mode 100644
index 0000000..6610437
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fork;
+
+class TimeoutLimits {
+
+    private final long pulseMS;
+    private final long parseTimeoutMS;
+    private final long waitTimeoutMS;
+
+
+    TimeoutLimits(long pulseMS, long parseTimeoutMS, long waitTimeoutMS) {
+        this.pulseMS = pulseMS;
+        this.parseTimeoutMS = parseTimeoutMS;
+        this.waitTimeoutMS = waitTimeoutMS;
+    }
+
+    public long getPulseMS() {
+        return pulseMS;
+    }
+
+    public long getParseTimeoutMS() {
+        return parseTimeoutMS;
+    }
+
+    public long getWaitTimeoutMS() {
+        return waitTimeoutMS;
+    }
+}
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
index 1759d5b..9167073 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
@@ -208,9 +208,12 @@ public class ForkParserTest extends TikaTest {
     }
 
     @Test
-    public void testPulse() throws Exception {
-        //test default 5000 ms
+    public void testPulseAndTimeouts() throws Exception {
+
         ForkParser forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser());
+        forkParser.setServerPulseMillis(500);
+        forkParser.setServerParseTimeoutMillis(5000);
+        forkParser.setServerWaitTimeoutMillis(60000);
         String sleepCommand = "<mock>\n" +
                 "    <write element=\"p\">Hello, World!</write>\n" +
                 "    <hang millis=\"11000\" heavy=\"false\" interruptible=\"false\" />\n" +
@@ -228,6 +231,7 @@ public class ForkParserTest extends TikaTest {
         //test setting very short pulse (10 ms) and a parser that takes at least 1000 ms
         forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser());
         forkParser.setServerPulseMillis(10);
+        forkParser.setServerParseTimeoutMillis(100);
         sleepCommand = "<mock>\n" +
                 "    <write element=\"p\">Hello, World!</write>\n" +
                 "    <hang millis=\"1000\" heavy=\"false\" interruptible=\"false\" />\n" +
@@ -271,7 +275,7 @@ public class ForkParserTest extends TikaTest {
         ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
         Metadata metadata = new Metadata();
         ParseContext context = new ParseContext();
-        try (InputStream is = getClass().getResourceAsStream("basic_embedded.xml")) {
+        try (InputStream is = getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
             fork.parse(is, handler, metadata, context);
         } finally {
             fork.close();
@@ -298,7 +302,7 @@ public class ForkParserTest extends TikaTest {
         ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
         Metadata metadata = new Metadata();
         ParseContext context = new ParseContext();
-        try (InputStream is = getClass().getResourceAsStream("embedded_with_npe.xml")) {
+        try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_with_npe.xml")) {
             fork.parse(is, handler, metadata, context);
         } finally {
             fork.close();
@@ -326,7 +330,7 @@ public class ForkParserTest extends TikaTest {
         ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
         Metadata metadata = new Metadata();
         ParseContext context = new ParseContext();
-        try (InputStream is = getClass().getResourceAsStream("embedded_then_npe.xml")) {
+        try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_then_npe.xml")) {
             fork.parse(is, handler, metadata, context);
             fail();
         } catch (TikaException e) {
@@ -354,7 +358,7 @@ public class ForkParserTest extends TikaTest {
         Path target = Files.createTempFile("fork-to-file-handler-", ".txt");
         try {
             ForkParser forkParser = null;
-            try (InputStream is = this.getClass().getResourceAsStream("basic_embedded.xml")) {
+            try (InputStream is = this.getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
                 RecursiveParserWrapper wrapper = new RecursiveParserWrapper(new AutoDetectParser());
                 ToFileHandler toFileHandler = new ToFileHandler(new SBContentHandlerFactory(), target.toFile());
                 forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
@@ -392,7 +396,7 @@ public class ForkParserTest extends TikaTest {
         ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
         Metadata metadata = new Metadata();
         ParseContext context = new ParseContext();
-        try (InputStream is = getClass().getResourceAsStream("basic_embedded.xml")) {
+        try (InputStream is = getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
             fork.parse(is, handler, metadata, context);
         } finally {
             fork.close();
@@ -422,7 +426,7 @@ public class ForkParserTest extends TikaTest {
         ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
         Metadata metadata = new Metadata();
         ParseContext context = new ParseContext();
-        try (InputStream is = getClass().getResourceAsStream("embedded_then_npe.xml")) {
+        try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_then_npe.xml")) {
             fork.parse(is, handler, metadata, context);
             fail();
         } catch (TikaException e) {
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
index d3e8336..1047504 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
@@ -24,7 +24,6 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.AutoDetectParserFactory;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.ToXMLContentHandler;
-import org.apache.tika.utils.XMLReaderUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -49,15 +48,15 @@ import java.util.jar.JarEntry;
 import java.util.jar.JarOutputStream;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 public class ForkParserTikaBinTest extends TikaTest {
     private static Path JAR_DIR;
     private static final String JAR_FILE_NAME = "mock-tika-app.jar";
     private static Path JAR_FILE;
 
+    @SuppressWarnings("unchecked")
+    private static final Map<String, String> EMPTY_MAP = Collections.EMPTY_MAP;
+
     @BeforeClass
     public static void bootstrapJar() throws Exception {
         JAR_DIR = Files.createTempDirectory("tika-fork-tikabin-");
@@ -87,7 +86,6 @@ public class ForkParserTikaBinTest extends TikaTest {
 
             jarOs.putNextEntry(new JarEntry("META-INF/services/org.apache.tika.parser.Parser"));
             jarOs.write("org.apache.tika.parser.mock.VowelParser\n".getBytes(StandardCharsets.UTF_8));
-
         }
 
         Path tikaConfigVowelParser = JAR_DIR.resolve("TIKA_2653-iou.xml");
@@ -109,7 +107,7 @@ public class ForkParserTikaBinTest extends TikaTest {
     @Test
     public void testExplicitParserFactory() throws Exception {
         XMLResult xmlResult = getXML(new ParserFactoryFactory("org.apache.tika.parser.mock.MockParserFactory",
-                Collections.EMPTY_MAP));
+                EMPTY_MAP));
         assertContains("hello world!", xmlResult.xml);
         assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
     }
@@ -118,7 +116,7 @@ public class ForkParserTikaBinTest extends TikaTest {
     public void testVowelParserAsDefault() throws Exception {
         ParserFactoryFactory pff = new ParserFactoryFactory(
                 "org.apache.tika.parser.AutoDetectParserFactory",
-                Collections.EMPTY_MAP);
+                EMPTY_MAP);
         XMLResult xmlResult = getXML(pff);
         assertContains("eooeuiooueoeeao", xmlResult.xml);
         assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
@@ -155,7 +153,7 @@ public class ForkParserTikaBinTest extends TikaTest {
 
         ParserFactoryFactory pff = new ParserFactoryFactory(
                 "org.apache.tika.parser.AutoDetectParserFactory",
-                Collections.EMPTY_MAP);
+                EMPTY_MAP);
         XMLResult xmlResult = getXML(pff, this.getClass().getClassLoader(), new UpperCasingContentHandler());
         assertContains("EOOEUIOOUEOEEAO", xmlResult.xml);
         assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
@@ -192,15 +190,14 @@ public class ForkParserTikaBinTest extends TikaTest {
     private static List<Class> getClasses(String packageName)
             throws ClassNotFoundException, IOException {
         ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
-        assert classLoader != null;
         String path = packageName.replace('.', '/');
         Enumeration<URL> resources = classLoader.getResources(path);
-        List<File> dirs = new ArrayList();
+        List<File> dirs = new ArrayList<>();
         while (resources.hasMoreElements()) {
             URL resource = resources.nextElement();
             dirs.add(new File(resource.getFile().replaceAll("%20", " ")));
         }
-        ArrayList classes = new ArrayList();
+        ArrayList<Class> classes = new ArrayList<>();
         for (File directory : dirs) {
             classes.addAll(findClasses(directory, packageName));
         }
@@ -208,16 +205,18 @@ public class ForkParserTikaBinTest extends TikaTest {
     }
 
     private static List<Class> findClasses(File dir, String packageName) throws ClassNotFoundException {
-        List<Class> classes = new ArrayList();
+        List<Class> classes = new ArrayList<>();
         if (!dir.exists()) {
             return classes;
         }
         File[] files = dir.listFiles();
         for (File file : files) {
             if (file.isDirectory()) {
-                assert !file.getName().contains(".");
                 classes.addAll(findClasses(file, packageName + "." + file.getName()));
             } else if (file.getName().endsWith(".class")) {
+                //exclude TypeDetectionBenchmark because it is not serializable
+                //exclude UpperCasingContentHandler because we want to test that
+                //we can serialize it from the parent process into the child process
                 if (! file.getName().contains("TypeDetectionBenchmark") &&
                         !file.getName().contains("UpperCasingContentHandler")) {
                     classes.add(Class.forName(packageName + '.' + file.getName().substring(0, file.getName().length() - 6)));
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/basic_embedded.xml b/tika-core/src/test/resources/test-documents/basic_embedded.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/basic_embedded.xml
rename to tika-core/src/test/resources/test-documents/basic_embedded.xml
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/embedded_then_npe.xml b/tika-core/src/test/resources/test-documents/embedded_then_npe.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/embedded_then_npe.xml
rename to tika-core/src/test/resources/test-documents/embedded_then_npe.xml
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/embedded_with_npe.xml b/tika-core/src/test/resources/test-documents/embedded_with_npe.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/embedded_with_npe.xml
rename to tika-core/src/test/resources/test-documents/embedded_with_npe.xml

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.