You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/06/01 12:51:13 UTC
[tika] branch master updated: TIKA-2656 -- allow absolute timeout
for ForkParser
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 929dcd7 TIKA-2656 -- allow absolute timeout for ForkParser
929dcd7 is described below
commit 929dcd75b8480a2a2d19a1801a88973b5e9358ac
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Jun 1 08:50:47 2018 -0400
TIKA-2656 -- allow absolute timeout for ForkParser
---
CHANGES.txt | 3 +
.../main/java/org/apache/tika/fork/ForkClient.java | 79 ++++-----------
.../main/java/org/apache/tika/fork/ForkParser.java | 42 ++++++--
.../main/java/org/apache/tika/fork/ForkServer.java | 110 ++++++++++++---------
.../java/org/apache/tika/fork/TimeoutLimits.java | 43 ++++++++
.../java/org/apache/tika/fork/ForkParserTest.java | 20 ++--
.../apache/tika/fork/ForkParserTikaBinTest.java | 25 +++--
.../fork => test-documents}/basic_embedded.xml | 0
.../fork => test-documents}/embedded_then_npe.xml | 0
.../fork => test-documents}/embedded_with_npe.xml | 0
10 files changed, 189 insertions(+), 133 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 891f852..c6503f3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,9 @@ Release 2.0.0 - ???
Release 1.19 ???
+ * Add absolute timeout to ForkParser rather than testing
+ for active (TIKA-2656).
+
* Make the RecursiveParserWrapper work with the ForkParser (TIKA-2655).
* Allow the ForkParser to specify a directory containing tika-app.jar
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
index fc86784..a79de48 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
@@ -53,65 +53,21 @@ class ForkClient {
private final DataInputStream input;
- private final InputStream error;
-
- public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, List<String> java, long serverPulseMillis) throws IOException, TikaException {
- jar = null;
- loader = null;
- boolean ok = false;
- ProcessBuilder builder = new ProcessBuilder();
- List<String> command = new ArrayList<>();
- command.addAll(java);
- command.add("-cp");
- String dirString = tikaDir.toAbsolutePath().toString();
- if (!dirString.endsWith("/")) {
- dirString += "/*";
- } else {
- dirString += "/";
- }
- dirString = ProcessUtils.escapeCommandLine(dirString);
- command.add(dirString);
- command.add("org.apache.tika.fork.ForkServer");
- command.add(Long.toString(serverPulseMillis));
- builder.command(command);
- builder.redirectError(ProcessBuilder.Redirect.INHERIT);
- try {
- this.process = builder.start();
-
- this.output = new DataOutputStream(process.getOutputStream());
- this.input = new DataInputStream(process.getInputStream());
- this.error = process.getErrorStream();
-
- waitForStartBeacon();
- output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY);
- output.flush();
- sendObject(parserFactoryFactory, resources);
-
- waitForStartBeacon();
-
- ok = true;
- } catch (Throwable t) {
- t.printStackTrace();
- throw t;
- } finally {
- if (!ok) {
- close();
- }
- }
+ public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, List<String> java,
+ TimeoutLimits timeoutLimits) throws IOException, TikaException {
+ this(tikaDir, parserFactoryFactory, null, java, timeoutLimits);
}
-
/**
*
* @param tikaDir directory containing jars from which to start the child server and load the Parser
* @param parserFactoryFactory factory to send to child process to build parser upon arrival
* @param classLoader class loader to use for non-parser resource (content-handler, etc.)
* @param java java commandline to use for the commandline server
- * @param serverPulseMillis how often to check if the server has been active
* @throws IOException
* @throws TikaException
*/
public ForkClient(Path tikaDir, ParserFactoryFactory parserFactoryFactory, ClassLoader classLoader,
- List<String> java, long serverPulseMillis) throws IOException, TikaException {
+ List<String> java, TimeoutLimits timeoutLimits) throws IOException, TikaException {
jar = null;
loader = null;
boolean ok = false;
@@ -128,7 +84,9 @@ class ForkClient {
dirString = ProcessUtils.escapeCommandLine(dirString);
command.add(dirString);
command.add("org.apache.tika.fork.ForkServer");
- command.add(Long.toString(serverPulseMillis));
+ command.add(Long.toString(timeoutLimits.getPulseMS()));
+ command.add(Long.toString(timeoutLimits.getParseTimeoutMS()));
+ command.add(Long.toString(timeoutLimits.getWaitTimeoutMS()));
builder.command(command);
builder.redirectError(ProcessBuilder.Redirect.INHERIT);
try {
@@ -136,13 +94,18 @@ class ForkClient {
this.output = new DataOutputStream(process.getOutputStream());
this.input = new DataInputStream(process.getInputStream());
- this.error = process.getErrorStream();
waitForStartBeacon();
- output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY_LOADER);
+ if (classLoader != null) {
+ output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY_LOADER);
+ } else {
+ output.writeByte(ForkServer.INIT_PARSER_FACTORY_FACTORY);
+ }
output.flush();
sendObject(parserFactoryFactory, resources);
- sendObject(classLoader, resources);
+ if (classLoader != null) {
+ sendObject(classLoader, resources);
+ }
waitForStartBeacon();
ok = true;
} catch (Throwable t) {
@@ -156,7 +119,7 @@ class ForkClient {
}
- public ForkClient(ClassLoader loader, Object object, List<String> java, long serverPulseMillis)
+ public ForkClient(ClassLoader loader, Object object, List<String> java, TimeoutLimits timeoutLimits)
throws IOException, TikaException {
boolean ok = false;
try {
@@ -168,14 +131,15 @@ class ForkClient {
command.addAll(java);
command.add("-jar");
command.add(jar.getPath());
- command.add(Long.toString(serverPulseMillis));
+ command.add(Long.toString(timeoutLimits.getPulseMS()));
+ command.add(Long.toString(timeoutLimits.getParseTimeoutMS()));
+ command.add(Long.toString(timeoutLimits.getWaitTimeoutMS()));
builder.command(command);
builder.redirectError(ProcessBuilder.Redirect.INHERIT);
this.process = builder.start();
this.output = new DataOutputStream(process.getOutputStream());
this.input = new DataInputStream(process.getInputStream());
- this.error = process.getErrorStream();
waitForStartBeacon();
output.writeByte(ForkServer.INIT_LOADER_PARSER);
@@ -283,13 +247,10 @@ class ForkClient {
if (input != null) {
input.close();
}
- if (error != null) {
- error.close();
- }
} catch (IOException ignore) {
}
if (process != null) {
- process.destroy();
+ process.destroyForcibly();
try {
//TIKA-1933
process.waitFor();
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
index 3df9826..da0b573 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Queue;
import java.util.Set;
+import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -57,13 +58,19 @@ public class ForkParser extends AbstractParser {
private List<String> java = Arrays.asList("java", "-Xmx32m");
/** Process pool size */
+ @Field
private int poolSize = 5;
private int currentlyInUse = 0;
private final Queue<ForkClient> pool = new LinkedList<>();
- private long serverPulseMillis = 5000;
+ @Field
+ private long serverPulseMillis = 1000;
+ @Field
+ private long serverParseTimeoutMillis = 60000;
+ @Field
+ private long serverWaitTimeoutMillis = 60000;
/**
* If you have a directory with, say, tike-app.jar and you want the child process/server to build a parser
@@ -319,13 +326,13 @@ public class ForkParser extends AbstractParser {
}
private ForkClient newClient() throws IOException, TikaException {
-
+ TimeoutLimits timeoutLimits = new TimeoutLimits(serverPulseMillis, serverParseTimeoutMillis, serverWaitTimeoutMillis);
if (loader == null && parser == null && tikaBin != null && parserFactoryFactory != null) {
- return new ForkClient(tikaBin, parserFactoryFactory, java, serverPulseMillis);
+ return new ForkClient(tikaBin, parserFactoryFactory, java, timeoutLimits);
} else if (loader != null && parser != null && tikaBin == null && parserFactoryFactory == null) {
- return new ForkClient(loader, parser, java, serverPulseMillis);
+ return new ForkClient(loader, parser, java, timeoutLimits);
} else if (loader != null && parser == null && tikaBin != null && parserFactoryFactory != null) {
- return new ForkClient(tikaBin, parserFactoryFactory, loader, java, serverPulseMillis);
+ return new ForkClient(tikaBin, parserFactoryFactory, loader, java, timeoutLimits);
} else {
//TODO: make this more useful
throw new IllegalStateException("Unexpected combination of state items");
@@ -345,8 +352,8 @@ public class ForkParser extends AbstractParser {
/**
* The amount of time in milliseconds that the server
- * should wait for any input or output. If it receives no
- * input or output in this amount of time, it will shutdown.
+ * should wait before checking to see if the parse has timed out
+ * or if the wait has timed out
* The default is 5 seconds.
*
* @param serverPulseMillis milliseconds to sleep before checking if there has been any activity
@@ -355,4 +362,25 @@ public class ForkParser extends AbstractParser {
this.serverPulseMillis = serverPulseMillis;
}
+ /**
+ * The maximum amount of time allowed for the server to try to parse a file.
+ * If more than this time elapses, the server shuts down, and the ForkParser
+ * throws an exception.
+ *
+ * @param serverParseTimeoutMillis
+ */
+ public void setServerParseTimeoutMillis(long serverParseTimeoutMillis) {
+ this.serverParseTimeoutMillis = serverParseTimeoutMillis;
+ }
+
+ /**
+ * The maximum amount of time allowed for the server to wait for a new request to parse
+ * a file. The server will shutdown after this amount of time, and a new server will have
+ * to be started by a new client.
+ * @param serverWaitTimeoutMillis
+ */
+ public void setServerWaitTimeoutMillis(long serverWaitTimeoutMillis) {
+ this.serverWaitTimeoutMillis = serverWaitTimeoutMillis;
+ }
+
}
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java b/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
index f4c49da..18345db 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
@@ -27,6 +27,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.NotSerializableException;
import java.io.OutputStream;
+import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
@@ -34,7 +35,7 @@ import java.util.zip.CheckedInputStream;
import java.util.zip.CheckedOutputStream;
import java.util.zip.Checksum;
-class ForkServer implements Runnable, Checksum {
+class ForkServer implements Runnable {
public static final byte ERROR = -1;
@@ -57,6 +58,11 @@ class ForkServer implements Runnable, Checksum {
//milliseconds to sleep before checking to see if there has been any reading/writing
//If no reading or writing in this time, shutdown the server.
private long serverPulseMillis = 5000;
+ private long serverParserTimeoutMillis = 60000;
+ private long serverWaitTimeoutMillis = 60000;
+
+ private Object[] lock = new Object[0];
+
/**
* Starts a forked server process using the standard input and output
* streams for communication with the parent process. Any attempts by
@@ -67,13 +73,14 @@ class ForkServer implements Runnable, Checksum {
* @throws Exception if the server could not be started
*/
public static void main(String[] args) throws Exception {
- long serverPulseMillis = -1;
- if (args.length > 0) {
- serverPulseMillis = Long.parseLong(args[0]);
- }
+ long serverPulseMillis = Long.parseLong(args[0]);
+ long serverParseTimeoutMillis = Long.parseLong(args[1]);
+ long serverWaitTimeoutMillis = Long.parseLong(args[2]);
+
URL.setURLStreamHandlerFactory(new MemoryURLStreamHandlerFactory());
- ForkServer server = new ForkServer(System.in, System.out, serverPulseMillis);
+ ForkServer server = new ForkServer(System.in, System.out,
+ serverPulseMillis, serverParseTimeoutMillis, serverWaitTimeoutMillis);
System.setIn(new ByteArrayInputStream(new byte[0]));
System.setOut(System.err);
@@ -97,6 +104,9 @@ class ForkServer implements Runnable, Checksum {
private Object parser;
private ClassLoader classLoader;
+ private boolean parsing = false;
+ private long since;
+
/**
* Sets up a forked server instance using the given stdin/out
* communication channel.
@@ -105,19 +115,31 @@ class ForkServer implements Runnable, Checksum {
* @param output output stream for writing to the parent process
* @throws IOException if the server instance could not be created
*/
- public ForkServer(InputStream input, OutputStream output, long serverPulseMillis)
+ public ForkServer(InputStream input, OutputStream output,
+ long serverPulseMillis, long serverParserTimeoutMillis, long serverWaitTimeoutMillis)
throws IOException {
this.input =
- new DataInputStream(new CheckedInputStream(input, this));
+ new DataInputStream(input);
this.output =
- new DataOutputStream(new CheckedOutputStream(output, this));
+ new DataOutputStream(output);
this.serverPulseMillis = serverPulseMillis;
+ this.serverParserTimeoutMillis = serverParserTimeoutMillis;
+ this.serverWaitTimeoutMillis = serverWaitTimeoutMillis;
+ this.parsing = false;
+ this.since = System.currentTimeMillis();
}
public void run() {
try {
- while (active) {
- active = false;
+ while (true) {
+ synchronized (lock) {
+ long elapsed = System.currentTimeMillis()-since;
+ if (parsing && elapsed > serverParserTimeoutMillis) {
+ break;
+ } else if (!parsing && serverWaitTimeoutMillis > 0 && elapsed > serverWaitTimeoutMillis) {
+ break;
+ }
+ }
Thread.sleep(serverPulseMillis);
}
System.exit(0);
@@ -126,6 +148,7 @@ class ForkServer implements Runnable, Checksum {
}
public void processRequests() {
+ //initialize
try {
initializeParserAndLoader();
} catch (Throwable t) {
@@ -140,6 +163,7 @@ class ForkServer implements Runnable, Checksum {
}
return;
}
+ //main loop
try {
while (true) {
int request = input.read();
@@ -210,26 +234,38 @@ class ForkServer implements Runnable, Checksum {
}
private void call(ClassLoader loader, Object object) throws Exception {
- Method method = getMethod(object, input.readUTF());
- Object[] args =
- new Object[method.getParameterTypes().length];
- for (int i = 0; i < args.length; i++) {
- args[i] = readObject(loader);
+ synchronized (lock) {
+ parsing = true;
+ since = System.currentTimeMillis();
}
try {
- method.invoke(object, args);
- output.write(DONE);
- } catch (InvocationTargetException e) {
- output.write(ERROR);
- // Try to send the underlying Exception itself
- Throwable toSend = e.getCause();
+ Method method = getMethod(object, input.readUTF());
+ Object[] args =
+ new Object[method.getParameterTypes().length];
+ for (int i = 0; i < args.length; i++) {
+ args[i] = readObject(loader);
+ }
try {
- ForkObjectInputStream.sendObject(toSend, output);
- } catch (NotSerializableException nse) {
- // Need to build a serializable version of it
- TikaException te = new TikaException( toSend.getMessage() );
- te.setStackTrace( toSend.getStackTrace() );
- ForkObjectInputStream.sendObject(te, output);
+ method.invoke(object, args);
+ output.write(DONE);
+ } catch (InvocationTargetException e) {
+ output.write(ERROR);
+ // Try to send the underlying Exception itself
+ Throwable toSend = e.getCause();
+ try {
+ ForkObjectInputStream.sendObject(toSend, output);
+ } catch (NotSerializableException nse) {
+ // Need to build a serializable version of it
+ TikaException te = new TikaException(toSend.getMessage());
+ te.setStackTrace(toSend.getStackTrace());
+ ForkObjectInputStream.sendObject(te, output);
+ }
+
+ }
+ } finally {
+ synchronized (lock) {
+ parsing = false;
+ since = System.currentTimeMillis();
}
}
}
@@ -271,22 +307,4 @@ class ForkServer implements Runnable, Checksum {
return object;
}
-
- //------------------------------------------------------------< Checksum >
-
- public void update(int b) {
- active = true;
- }
-
- public void update(byte[] b, int off, int len) {
- active = true;
- }
-
- public long getValue() {
- return 0;
- }
-
- public void reset() {
- }
-
}
diff --git a/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java b/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java
new file mode 100644
index 0000000..6610437
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fork/TimeoutLimits.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fork;
+
+class TimeoutLimits {
+
+ private final long pulseMS;
+ private final long parseTimeoutMS;
+ private final long waitTimeoutMS;
+
+
+ TimeoutLimits(long pulseMS, long parseTimeoutMS, long waitTimeoutMS) {
+ this.pulseMS = pulseMS;
+ this.parseTimeoutMS = parseTimeoutMS;
+ this.waitTimeoutMS = waitTimeoutMS;
+ }
+
+ public long getPulseMS() {
+ return pulseMS;
+ }
+
+ public long getParseTimeoutMS() {
+ return parseTimeoutMS;
+ }
+
+ public long getWaitTimeoutMS() {
+ return waitTimeoutMS;
+ }
+}
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
index 1759d5b..9167073 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
@@ -208,9 +208,12 @@ public class ForkParserTest extends TikaTest {
}
@Test
- public void testPulse() throws Exception {
- //test default 5000 ms
+ public void testPulseAndTimeouts() throws Exception {
+
ForkParser forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser());
+ forkParser.setServerPulseMillis(500);
+ forkParser.setServerParseTimeoutMillis(5000);
+ forkParser.setServerWaitTimeoutMillis(60000);
String sleepCommand = "<mock>\n" +
" <write element=\"p\">Hello, World!</write>\n" +
" <hang millis=\"11000\" heavy=\"false\" interruptible=\"false\" />\n" +
@@ -228,6 +231,7 @@ public class ForkParserTest extends TikaTest {
//test setting very short pulse (10 ms) and a parser that takes at least 1000 ms
forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), new MockParser());
forkParser.setServerPulseMillis(10);
+ forkParser.setServerParseTimeoutMillis(100);
sleepCommand = "<mock>\n" +
" <write element=\"p\">Hello, World!</write>\n" +
" <hang millis=\"1000\" heavy=\"false\" interruptible=\"false\" />\n" +
@@ -271,7 +275,7 @@ public class ForkParserTest extends TikaTest {
ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
- try (InputStream is = getClass().getResourceAsStream("basic_embedded.xml")) {
+ try (InputStream is = getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
fork.parse(is, handler, metadata, context);
} finally {
fork.close();
@@ -298,7 +302,7 @@ public class ForkParserTest extends TikaTest {
ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
- try (InputStream is = getClass().getResourceAsStream("embedded_with_npe.xml")) {
+ try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_with_npe.xml")) {
fork.parse(is, handler, metadata, context);
} finally {
fork.close();
@@ -326,7 +330,7 @@ public class ForkParserTest extends TikaTest {
ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
- try (InputStream is = getClass().getResourceAsStream("embedded_then_npe.xml")) {
+ try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_then_npe.xml")) {
fork.parse(is, handler, metadata, context);
fail();
} catch (TikaException e) {
@@ -354,7 +358,7 @@ public class ForkParserTest extends TikaTest {
Path target = Files.createTempFile("fork-to-file-handler-", ".txt");
try {
ForkParser forkParser = null;
- try (InputStream is = this.getClass().getResourceAsStream("basic_embedded.xml")) {
+ try (InputStream is = this.getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(new AutoDetectParser());
ToFileHandler toFileHandler = new ToFileHandler(new SBContentHandlerFactory(), target.toFile());
forkParser = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
@@ -392,7 +396,7 @@ public class ForkParserTest extends TikaTest {
ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
- try (InputStream is = getClass().getResourceAsStream("basic_embedded.xml")) {
+ try (InputStream is = getClass().getResourceAsStream("/test-documents/basic_embedded.xml")) {
fork.parse(is, handler, metadata, context);
} finally {
fork.close();
@@ -422,7 +426,7 @@ public class ForkParserTest extends TikaTest {
ForkParser fork = new ForkParser(ForkParserTest.class.getClassLoader(), wrapper);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
- try (InputStream is = getClass().getResourceAsStream("embedded_then_npe.xml")) {
+ try (InputStream is = getClass().getResourceAsStream("/test-documents/embedded_then_npe.xml")) {
fork.parse(is, handler, metadata, context);
fail();
} catch (TikaException e) {
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
index d3e8336..1047504 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTikaBinTest.java
@@ -24,7 +24,6 @@ import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParserFactory;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ToXMLContentHandler;
-import org.apache.tika.utils.XMLReaderUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -49,15 +48,15 @@ import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
public class ForkParserTikaBinTest extends TikaTest {
private static Path JAR_DIR;
private static final String JAR_FILE_NAME = "mock-tika-app.jar";
private static Path JAR_FILE;
+ @SuppressWarnings("unchecked")
+ private static final Map<String, String> EMPTY_MAP = Collections.EMPTY_MAP;
+
@BeforeClass
public static void bootstrapJar() throws Exception {
JAR_DIR = Files.createTempDirectory("tika-fork-tikabin-");
@@ -87,7 +86,6 @@ public class ForkParserTikaBinTest extends TikaTest {
jarOs.putNextEntry(new JarEntry("META-INF/services/org.apache.tika.parser.Parser"));
jarOs.write("org.apache.tika.parser.mock.VowelParser\n".getBytes(StandardCharsets.UTF_8));
-
}
Path tikaConfigVowelParser = JAR_DIR.resolve("TIKA_2653-iou.xml");
@@ -109,7 +107,7 @@ public class ForkParserTikaBinTest extends TikaTest {
@Test
public void testExplicitParserFactory() throws Exception {
XMLResult xmlResult = getXML(new ParserFactoryFactory("org.apache.tika.parser.mock.MockParserFactory",
- Collections.EMPTY_MAP));
+ EMPTY_MAP));
assertContains("hello world!", xmlResult.xml);
assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
}
@@ -118,7 +116,7 @@ public class ForkParserTikaBinTest extends TikaTest {
public void testVowelParserAsDefault() throws Exception {
ParserFactoryFactory pff = new ParserFactoryFactory(
"org.apache.tika.parser.AutoDetectParserFactory",
- Collections.EMPTY_MAP);
+ EMPTY_MAP);
XMLResult xmlResult = getXML(pff);
assertContains("eooeuiooueoeeao", xmlResult.xml);
assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
@@ -155,7 +153,7 @@ public class ForkParserTikaBinTest extends TikaTest {
ParserFactoryFactory pff = new ParserFactoryFactory(
"org.apache.tika.parser.AutoDetectParserFactory",
- Collections.EMPTY_MAP);
+ EMPTY_MAP);
XMLResult xmlResult = getXML(pff, this.getClass().getClassLoader(), new UpperCasingContentHandler());
assertContains("EOOEUIOOUEOEEAO", xmlResult.xml);
assertEquals("Nikolai Lobachevsky", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
@@ -192,15 +190,14 @@ public class ForkParserTikaBinTest extends TikaTest {
private static List<Class> getClasses(String packageName)
throws ClassNotFoundException, IOException {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- assert classLoader != null;
String path = packageName.replace('.', '/');
Enumeration<URL> resources = classLoader.getResources(path);
- List<File> dirs = new ArrayList();
+ List<File> dirs = new ArrayList<>();
while (resources.hasMoreElements()) {
URL resource = resources.nextElement();
dirs.add(new File(resource.getFile().replaceAll("%20", " ")));
}
- ArrayList classes = new ArrayList();
+ ArrayList<Class> classes = new ArrayList<>();
for (File directory : dirs) {
classes.addAll(findClasses(directory, packageName));
}
@@ -208,16 +205,18 @@ public class ForkParserTikaBinTest extends TikaTest {
}
private static List<Class> findClasses(File dir, String packageName) throws ClassNotFoundException {
- List<Class> classes = new ArrayList();
+ List<Class> classes = new ArrayList<>();
if (!dir.exists()) {
return classes;
}
File[] files = dir.listFiles();
for (File file : files) {
if (file.isDirectory()) {
- assert !file.getName().contains(".");
classes.addAll(findClasses(file, packageName + "." + file.getName()));
} else if (file.getName().endsWith(".class")) {
+ //exclude TypeDetectionBenchmark because it is not serializable
+ //exclude UpperCasingContentHandler because we want to test that
+ //we can serialize it from the parent process into the child process
if (! file.getName().contains("TypeDetectionBenchmark") &&
!file.getName().contains("UpperCasingContentHandler")) {
classes.add(Class.forName(packageName + '.' + file.getName().substring(0, file.getName().length() - 6)));
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/basic_embedded.xml b/tika-core/src/test/resources/test-documents/basic_embedded.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/basic_embedded.xml
rename to tika-core/src/test/resources/test-documents/basic_embedded.xml
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/embedded_then_npe.xml b/tika-core/src/test/resources/test-documents/embedded_then_npe.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/embedded_then_npe.xml
rename to tika-core/src/test/resources/test-documents/embedded_then_npe.xml
diff --git a/tika-core/src/test/resources/org/apache/tika/fork/embedded_with_npe.xml b/tika-core/src/test/resources/test-documents/embedded_with_npe.xml
similarity index 100%
rename from tika-core/src/test/resources/org/apache/tika/fork/embedded_with_npe.xml
rename to tika-core/src/test/resources/test-documents/embedded_with_npe.xml
--
To stop receiving notification emails like this one, please contact
tallison@apache.org.