You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/02/11 15:47:31 UTC
[tika] 01/02: TIKA-3293 -- WIP -- move to a config file for
tika-server
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3293
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 9d1ebbc7fba568749c8f3b0b4fd239f3621818f6
Author: tballison <ta...@apache.org>
AuthorDate: Wed Feb 10 15:44:04 2021 -0500
TIKA-3293 -- WIP -- move to a config file for tika-server
---
.../tika/server/core/ServerStatusWatcher.java | 21 +-
.../tika/server/core/ServerTimeoutConfig.java | 136 -----
.../org/apache/tika/server/core/TikaServerCli.java | 242 ++-------
.../apache/tika/server/core/TikaServerConfig.java | 552 +++++++++++++++++++++
.../apache/tika/server/core/TikaServerProcess.java | 317 ++++++------
.../tika/server/core/TikaServerWatchDog.java | 60 +--
.../core/writer/MetadataListMessageBodyWriter.java | 1 -
.../main/resources/tika-server-config-default.xml | 97 ++++
.../org/apache/tika/server/core/CXFTestBase.java | 2 +-
.../tika/server/core/TikaServerConfigTest.java | 20 +
.../server/core/TikaServerIntegrationTest.java | 28 +-
.../configs/tika-config-server-badjvmargs.xml | 31 ++
.../resources/configs/tika-config-server-basic.xml | 30 ++
.../configs/tika-config-server-timeout-10000.xml | 26 +
.../test/resources/configs/tika-config-server.xml | 29 ++
15 files changed, 1014 insertions(+), 578 deletions(-)
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
index 7ac0ffd..a7a5cd7 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
@@ -39,8 +39,7 @@ public class ServerStatusWatcher implements Runnable {
private static final Logger LOG = LoggerFactory.getLogger(ServerStatusWatcher.class);
private final ServerStatus serverStatus;
private final DataInputStream fromParent;
- private final long maxFiles;
- private final ServerTimeoutConfig serverTimeouts;
+ private final TikaServerConfig tikaServerConfig;
private final Path forkedStatusPath;
private final ByteBuffer statusBuffer = ByteBuffer.allocate(16);
@@ -50,11 +49,9 @@ public class ServerStatusWatcher implements Runnable {
public ServerStatusWatcher(ServerStatus serverStatus,
InputStream inputStream, Path forkedStatusPath,
- long maxFiles,
- ServerTimeoutConfig serverTimeouts) throws IOException {
+ TikaServerConfig tikaServerConfig) throws IOException {
this.serverStatus = serverStatus;
- this.maxFiles = maxFiles;
- this.serverTimeouts = serverTimeouts;
+ this.tikaServerConfig = tikaServerConfig;
this.forkedStatusPath = forkedStatusPath;
serverStatus.setStatus(ServerStatus.STATUS.OPERATING);
this.fromParent = new DataInputStream(inputStream);
@@ -112,7 +109,7 @@ public class ServerStatusWatcher implements Runnable {
try (FileChannel channel = FileChannel.open(forkedStatusPath,
StandardOpenOption.CREATE,
StandardOpenOption.WRITE)) {
- while (elapsed < serverTimeouts.getPingTimeoutMillis()) {
+ while (elapsed < tikaServerConfig.getPingTimeoutMillis()) {
try (FileLock lock = channel.tryLock()) {
if (lock != null) {
((Buffer) statusBuffer).position(0);
@@ -131,11 +128,11 @@ public class ServerStatusWatcher implements Runnable {
}
private void checkForHitMaxFiles() {
- if (maxFiles < 0) {
+ if (tikaServerConfig.getMaxFiles() < 0) {
return;
}
long filesProcessed = serverStatus.getFilesProcessed();
- if (filesProcessed >= maxFiles) {
+ if (filesProcessed >= tikaServerConfig.getMaxFiles()) {
serverStatus.setStatus(ServerStatus.STATUS.HIT_MAX_FILES);
}
}
@@ -144,7 +141,7 @@ public class ServerStatusWatcher implements Runnable {
Instant now = Instant.now();
for (TaskStatus status : serverStatus.getTasks().values()) {
long millisElapsed = Duration.between(status.started, now).toMillis();
- if (millisElapsed > serverTimeouts.getTaskTimeoutMillis()) {
+ if (millisElapsed > tikaServerConfig.getTaskTimeoutMillis()) {
serverStatus.setStatus(ServerStatus.STATUS.TIMEOUT);
if (status.fileName.isPresent()) {
LOG.error("Timeout task {}, millis elapsed {}, file {}" +
@@ -199,13 +196,13 @@ public class ServerStatusWatcher implements Runnable {
if (lastPing != null) {
long elapsed = Duration.between(lastPing, Instant.now()).toMillis();
- if (elapsed > serverTimeouts.getPingTimeoutMillis()) {
+ if (elapsed > tikaServerConfig.getPingTimeoutMillis()) {
serverStatus.setStatus(ServerStatus.STATUS.PARENT_EXCEPTION);
shutdown(ServerStatus.STATUS.PARENT_EXCEPTION);
}
}
try {
- Thread.sleep(serverTimeouts.getPingPulseMillis());
+ Thread.sleep(tikaServerConfig.getPingPulseMillis());
} catch (InterruptedException e) {
return;
}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeoutConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeoutConfig.java
deleted file mode 100644
index 53bcd64..0000000
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeoutConfig.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.server.core;
-
-public class ServerTimeoutConfig {
-
- /*
- TODO: integrate these settings:
- * Number of milliseconds to wait to start forked process.
- public static final long DEFAULT_FORKED_PROCESS_STARTUP_MILLIS = 60000;
-
- * Maximum number of milliseconds to wait to shutdown forked process to allow
- * for current parses to complete.
- public static final long DEFAULT_FORKED_PROCESS_SHUTDOWN_MILLIS = 30000;
-
- private long forkedProcessStartupMillis = DEFAULT_FORKED_PROCESS_STARTUP_MILLIS;
-
- private long forkedProcessShutdownMillis = DEFAULT_FORKED_PROCESS_SHUTDOWN_MILLIS;
-
- */
-
-
-
- /**
- * If the forked process doesn't receive a ping or the parent doesn't
- * hear back from a ping in this amount of time, terminate and restart the forked process.
- */
- public static final long DEFAULT_PING_TIMEOUT_MILLIS = 30000;
-
- /**
- * How often should the parent try to ping the forked process to check status
- */
- public static final long DEFAULT_PING_PULSE_MILLIS = 500;
-
- /**
- * Number of milliseconds to wait per server task (parse, detect, unpack, translate,
- * etc.) before timing out and shutting down the forked process.
- */
- public static final long DEFAULT_TASK_TIMEOUT_MILLIS = 120000;
-
- /**
- * Number of milliseconds to wait for forked process to startup
- */
- public static final long DEFAULT_FORKED_STARTUP_MILLIS = 120000;
-
- private int maxRestarts = -1;
-
- private long taskTimeoutMillis = DEFAULT_TASK_TIMEOUT_MILLIS;
-
- private long pingTimeoutMillis = DEFAULT_PING_TIMEOUT_MILLIS;
-
- private long pingPulseMillis = DEFAULT_PING_PULSE_MILLIS;
-
- private long maxforkedStartupMillis = DEFAULT_FORKED_STARTUP_MILLIS;
-
-
- /**
- * How long to wait for a task before shutting down the forked server process
- * and restarting it.
- * @return
- */
- public long getTaskTimeoutMillis() {
- return taskTimeoutMillis;
- }
-
- /**
- *
- * @param taskTimeoutMillis number of milliseconds to allow per task
- * (parse, detection, unzipping, etc.)
- */
- public void setTaskTimeoutMillis(long taskTimeoutMillis) {
- this.taskTimeoutMillis = taskTimeoutMillis;
- }
-
- public long getPingTimeoutMillis() {
- return pingTimeoutMillis;
- }
-
- /**
- *
- * @param pingTimeoutMillis if the parent doesn't receive a response
- * in this amount of time, or
- * if the forked doesn't receive a ping
- * in this amount of time, restart the forked process
- */
- public void setPingTimeoutMillis(long pingTimeoutMillis) {
- this.pingTimeoutMillis = pingTimeoutMillis;
- }
-
- public long getPingPulseMillis() {
- return pingPulseMillis;
- }
-
- /**
- *
- * @param pingPulseMillis how often to test that the parent and/or forked is alive
- */
- public void setPingPulseMillis(long pingPulseMillis) {
- this.pingPulseMillis = pingPulseMillis;
- }
-
- public int getMaxRestarts() {
- return maxRestarts;
- }
-
- public void setMaxRestarts(int maxRestarts) {
- this.maxRestarts = maxRestarts;
- }
-
- /**
- * Maximum time in millis to allow for the forked process to startup
- * or restart
- * @return
- */
- public long getMaxForkedStartupMillis() {
- return maxforkedStartupMillis;
- }
-
- public void setMaxForkedStartupMillis(long maxForkedStartupMillis) {
- this.maxforkedStartupMillis = maxForkedStartupMillis;
- }
-}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
index 14a2ff4..3e89f96 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
@@ -19,9 +19,7 @@ package org.apache.tika.server.core;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
@@ -40,78 +38,27 @@ import org.apache.tika.Tika;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TikaServerCli {
-
-
- //used in fork mode -- restart after processing this many files
- private static final long DEFAULT_MAX_FILES = 100000;
+import static org.apache.tika.server.core.TikaServerConfig.DEFAULT_HOST;
+import static org.apache.tika.server.core.TikaServerConfig.DEFAULT_PORT;
+public class TikaServerCli {
- public static final int DEFAULT_PORT = 9998;
- private static final int DEFAULT_DIGEST_MARK_LIMIT = 20*1024*1024;
- public static final String DEFAULT_HOST = "localhost";
- public static final Set<String> LOG_LEVELS = new HashSet<>(Arrays.asList("debug", "info"));
private static final Logger LOG = LoggerFactory.getLogger(TikaServerCli.class);
- private static final String UNSECURE_WARNING =
- "WARNING: You have chosen to run tika-server with unsecure features enabled.\n"+
- "Whoever has access to your service now has the same read permissions\n"+
- "as you've given your fetchers and the same write permissions as your emitters.\n" +
- "Users could request and receive a sensitive file from your\n" +
- "drive or a webpage from your intranet and/or send malicious content to\n" +
- " your emitter endpoints. See CVE-2015-3271.\n"+
- "Please make sure you know what you are doing.";
-
- private static final List<String> ONLY_IN_FORK_MODE =
- Arrays.asList(new String[] { "taskTimeoutMillis", "taskPulseMillis",
- "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome", "maxRestarts",
- "numRestarts",
- "forkedStatusFile", "maxForkedStartupMillis", "tmpFilePrefix"});
-
private static Options getOptions() {
Options options = new Options();
- options.addOption("C", "cors", true, "origin allowed to make CORS requests (default=NONE)\nall allowed if \"all\"");
- options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ", use * for all)");
+ options.addOption("h", "host", true, "host name (default = "
+ + DEFAULT_HOST + ", use * for all)");
options.addOption("p", "port", true,
- "listen port(s) (default = " + DEFAULT_PORT + ").\n" +
+ "listen port(s) (default = 9998)\n" +
"Can specify multiple ports with inclusive ranges (e.g. 9990-9999)\n" +
"or with comma delimited list (e.g. 9996,9998,9995)");
- options.addOption("c", "config", true, "Tika Configuration file to override default config with.");
- options.addOption("d", "digest", true, "include digest in metadata, e.g. md5,sha1:32,sha256");
- options.addOption("dml", "digestMarkLimit", true, "max number of bytes to mark on stream for digest");
- options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
- options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
- options.addOption("i", "id", true, "id to use for server in server status endpoint");
- options.addOption("status", false, "enable the status endpoint");
options.addOption("?", "help", false, "this help message");
- options.addOption("enableUnsecureFeatures", false, "this is required to enable fetchers and emitters. "+
- " The user acknowledges that fetchers and emitters introduce potential security vulnerabilities.");
- options.addOption("noFork", false, "legacy mode, less robust -- this starts up tika-server" +
- " without forking a process.");
- options.addOption("taskTimeoutMillis", true,
- "Not allowed in -noFork: how long to wait for a task (e.g. parse) to finish");
- options.addOption("taskPulseMillis", true,
- "Not allowed in -noFork: how often to check if a task has timed out.");
- options.addOption("pingTimeoutMillis", true,
- "Not allowed in -noFork: how long to wait to wait for a ping and/or ping response.");
- options.addOption("pingPulseMillis", true,
- "Not allowed in -noFork: how often to check if a ping has timed out.");
- options.addOption("maxForkedStartupMillis", true,
- "Not allowed in -noFork: Maximum number of millis to wait for the forked process to startup.");
- options.addOption("maxRestarts", true,
- "Not allowed in -noFork: how many times to restart forked process, default is -1 (always restart)");
- options.addOption("maxFiles", true,
- "Not allowed in -noFork: shutdown server after this many files (to handle parsers that might introduce " +
- "slowly building memory leaks); the default is "+DEFAULT_MAX_FILES +". Set to -1 to turn this off.");
- options.addOption("javaHome", true,
- "Not allowed in -noFork: override system property JAVA_HOME for calling java for the forked process");
- options.addOption("forkedStatusFile", true,
- "Not allowed in -noFork: temporary file used as to communicate " +
- "with forking process -- do not use this! Should only be invoked by forking process.");
- options.addOption("tmpFilePrefix", true,
- "Not allowed in -noFork: prefix for temp file - for debugging only");
- options.addOption("numRestarts", true,
- "Not allowed in -noFork: number of times that the forked server has had to be restarted.");
+ options.addOption("c", "config", true, "tika-config file");
+
+ options.addOption("i", "id", true, "id to use for server in" +
+ " the server status endpoint and logging");
+
return options;
}
@@ -131,14 +78,13 @@ public class TikaServerCli {
CommandLineParser cliParser = new DefaultParser();
- CommandLine line = cliParser.parse(options, stripForkedArgs(args));
- String[] newArgs = addDefaults(line, args);
- line = cliParser.parse(options, stripForkedArgs(newArgs));
- if (line.hasOption("noFork")) {
- noFork(line, newArgs);
+ CommandLine line = cliParser.parse(options, args);
+ TikaServerConfig tikaServerConfig = TikaServerConfig.load(line);
+ if (tikaServerConfig.isNoFork()) {
+ noFork(tikaServerConfig);
} else {
try {
- mainLoop(line, newArgs);
+ mainLoop(tikaServerConfig);
} catch (InterruptedException e) {
e.printStackTrace();
//swallow
@@ -146,23 +92,15 @@ public class TikaServerCli {
}
}
- private static void mainLoop(CommandLine line, String[] origArgs) throws Exception {
-
- List<String> argList = new ArrayList<>();
- argList.addAll(Arrays.asList(origArgs));
-
- NonForkedValues nonForkedValues = extractNonForkedValues(argList);
- int maxRestarts = nonForkedValues.maxRestarts;
- List<PortIdPair> portIdPairs = getPortIdPairs(nonForkedValues.id, nonForkedValues.portString);
+ private static void mainLoop(TikaServerConfig tikaServerConfig) throws Exception {
- String[] args = argList.toArray(new String[0]);
+ List<PortIdPair> portIdPairs = getPortIdPairs(tikaServerConfig);
ExecutorService executorService = Executors.newFixedThreadPool(portIdPairs.size());
ExecutorCompletionService<WatchDogResult> executorCompletionService = new ExecutorCompletionService<>(executorService);
- ServerTimeoutConfig serverTimeoutConfig = configureServerTimeouts(line);
for (PortIdPair p : portIdPairs) {
executorCompletionService.submit(
- new TikaServerWatchDog(args, p.port, p.id,0, serverTimeoutConfig));
+ new TikaServerWatchDog(p.port, p.id,0, tikaServerConfig));
}
int finished = 0;
@@ -173,16 +111,17 @@ public class TikaServerCli {
LOG.debug("main loop future is available");
WatchDogResult result = future.get();
LOG.debug("main loop future: ({}); about to restart", result);
- if (maxRestarts < 0 || result.getNumRestarts() < maxRestarts) {
+ if (tikaServerConfig.getMaxRestarts() < 0 ||
+ result.getNumRestarts() < tikaServerConfig.getMaxRestarts()) {
System.err.println("starting up again");
executorCompletionService.submit(
- new TikaServerWatchDog(args, result.getPort(),
+ new TikaServerWatchDog(result.getPort(),
result.getId(),
- result.getNumRestarts(), serverTimeoutConfig));
+ result.getNumRestarts(), tikaServerConfig));
} else {
System.err.println("finished!");
LOG.warn("id {} with port {} has exceeded maxRestarts {}. Shutting down and not restarting.",
- result.getId(), result.getPort(), maxRestarts);
+ result.getId(), result.getPort(), tikaServerConfig.getMaxRestarts());
finished++;
}
}
@@ -203,145 +142,38 @@ public class TikaServerCli {
return ret.toArray(new String[0]);
}
- //removes and records values that either shouldn't go into the forked
- //process or need to be modified
- private static NonForkedValues extractNonForkedValues(List<String> args) {
- int idIndex = -1;
- int portIndex = -1;
- int maxRestartIndex = -1;
- NonForkedValues nonForked = new NonForkedValues();
-
- for (int i = 0; i < args.size()-1; i++) {
- if (args.get(i).equals("-i") || args.get(i).equals("--id")) {
- idIndex = i;
- nonForked.id = args.get(i+1);
- } else if (args.get(i).equals("-p") ||
- args.get(i).equals("--port") || args.get(i).equals("--ports")) {
- portIndex = i;
- nonForked.portString = args.get(i+1);
- } else if (args.get(i).equals("-maxRestarts")
- || args.get(i).equals("--maxRestarts")) {
- maxRestartIndex = i;
- nonForked.maxRestarts = Integer.parseInt(args.get(i+1));
- }
- }
-
-
- //now remove -i and -p and their values from args
- List<String> copy = new ArrayList<>();
- copy.addAll(args);
- args.clear();
- for(int i = 0; i < copy.size(); i++) {
- if (i == idIndex || i == portIndex || i == maxRestartIndex) {
- i++;
- continue;
- }
- args.add(copy.get(i));
- }
-
- return nonForked;
- }
-
- public static void noFork(CommandLine line, String[] args) {
- //make sure the user didn't misunderstand the options
- for (String forkedOnly : ONLY_IN_FORK_MODE) {
- if (line.hasOption(forkedOnly)) {
- System.err.println("The option '" + forkedOnly +
- "' can't be used with '-noFork'");
- usage(getOptions());
- }
- }
- if (line.hasOption("p")) {
- String val = line.getOptionValue("p");
- try {
- Integer.parseInt(val);
- } catch (NumberFormatException e) {
- System.err.println("-p must be a single integer in -noFork mode. I see: "+val);
- usage(getOptions());
- }
- }
- TikaServerProcess.main(args);
+ public static void noFork(TikaServerConfig tikaServerConfig) {
+ List<String> args = tikaServerConfig.getForkedProcessArgs(
+ tikaServerConfig.getPort(), tikaServerConfig.getIdBase());
+ TikaServerProcess.main(args.toArray(new String[0]));
}
- private static String[] addDefaults(CommandLine line, String[] args) {
- List<String> newArr = new ArrayList<>(Arrays.asList(args));
- if (! line.hasOption("p")) {
- newArr.add("-p");
- newArr.add(Integer.toString(DEFAULT_PORT));
- }
- if (! line.hasOption("h")) {
- newArr.add("-h");
- newArr.add(DEFAULT_HOST);
- }
-
- if (! line.hasOption("i")) {
- newArr.add("-i");
- newArr.add(UUID.randomUUID().toString());
- }
- return newArr.toArray(new String[0]);
+ private static void usage(Options options) {
+ HelpFormatter helpFormatter = new HelpFormatter();
+ helpFormatter.printHelp("tikaserver", options);
+ System.exit(-1);
}
- private static List<PortIdPair> getPortIdPairs(String idString, String portsArg) {
+ private static List<PortIdPair> getPortIdPairs(TikaServerConfig tikaServerConfig) {
List<PortIdPair> pairs = new ArrayList<>();
Matcher m = Pattern.compile("^(\\d+)-(\\d+)\\Z").matcher("");
- for (String val : portsArg.split(",")) {
+ for (String val : tikaServerConfig.getPortString().split(",")) {
m.reset(val);
if (m.find()) {
int min = Math.min(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)));
int max = Math.max(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)));
for (int i = min; i <= max; i++) {
- pairs.add(new PortIdPair(i, idString+"-"+i));
+ pairs.add(new PortIdPair(i, tikaServerConfig.getIdBase() + "-" + i));
}
} else {
- pairs.add(new PortIdPair(Integer.parseInt(val), idString+"-"+val));
+ pairs.add(new PortIdPair(Integer.parseInt(val),
+ tikaServerConfig.getIdBase() + "-"+val));
}
}
return pairs;
}
- private static void usage(Options options) {
- HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("tikaserver", options);
- System.exit(-1);
- }
-
- private static ServerTimeoutConfig configureServerTimeouts(CommandLine line) {
- ServerTimeoutConfig serverTimeouts = new ServerTimeoutConfig();
- /*TODO -- add these in
- if (line.hasOption("forkedProcessStartupMillis")) {
- serverTimeouts.setForkedProcessStartupMillis(
- Long.parseLong(line.getOptionValue("forkedProcessStartupMillis")));
- }
- if (line.hasOption("forkedProcessShutdownMillis")) {
- serverTimeouts.setForkedProcessShutdownMillis(
- Long.parseLong(line.getOptionValue("forkedProcesShutdownMillis")));
- }*/
- if (line.hasOption("taskTimeoutMillis")) {
- serverTimeouts.setTaskTimeoutMillis(
- Long.parseLong(line.getOptionValue("taskTimeoutMillis")));
- }
- if (line.hasOption("pingTimeoutMillis")) {
- serverTimeouts.setPingTimeoutMillis(
- Long.parseLong(line.getOptionValue("pingTimeoutMillis")));
- }
- if (line.hasOption("pingPulseMillis")) {
- serverTimeouts.setPingPulseMillis(
- Long.parseLong(line.getOptionValue("pingPulseMillis")));
- }
-
- if (line.hasOption("maxRestarts")) {
- serverTimeouts.setMaxRestarts(Integer.parseInt(line.getOptionValue("maxRestarts")));
- }
-
- if (line.hasOption("maxForkedStartupMillis")) {
- serverTimeouts.setMaxForkedStartupMillis(
- Long.parseLong(line.getOptionValue("maxForkedStartupMillis")));
- }
-
- return serverTimeouts;
- }
-
private static class PortIdPair {
int port;
String id;
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java
new file mode 100644
index 0000000..96fd0ef
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java
@@ -0,0 +1,552 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.utils.ProcessUtils;
+import org.apache.tika.utils.StringUtils;
+import org.apache.tika.utils.XMLReaderUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.UUID;
+
+public class TikaServerConfig {
+
+ //used in fork mode -- restart after processing this many files
+ private static final long DEFAULT_MAX_FILES = 100000;
+
+
+ public static final int DEFAULT_PORT = 9998;
+ private static final int DEFAULT_DIGEST_MARK_LIMIT = 20*1024*1024;
+ public static final String DEFAULT_HOST = "localhost";
+ public static final Set<String> LOG_LEVELS = new HashSet<>(Arrays.asList("debug", "info"));
+
+ private static final String UNSECURE_WARNING =
+ "WARNING: You have chosen to run tika-server with unsecure features enabled.\n"+
+ "Whoever has access to your service now has the same read permissions\n"+
+ "as you've given your fetchers and the same write permissions as your emitters.\n" +
+ "Users could request and receive a sensitive file from your\n" +
+ "drive or a webpage from your intranet and/or send malicious content to\n" +
+ " your emitter endpoints. See CVE-2015-3271.\n"+
+ "Please make sure you know what you are doing.";
+
+ private static final List<String> ONLY_IN_FORK_MODE =
+ Arrays.asList(new String[] { "taskTimeoutMillis", "taskPulseMillis",
+ "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome", "maxRestarts",
+ "numRestarts",
+ "forkedStatusFile", "maxForkedStartupMillis", "tmpFilePrefix"});
+
+ /**
+ * Config with only the defaults
+ */
+ public static TikaServerConfig load() {
+ return new TikaServerConfig();
+ }
+
+ public static TikaServerConfig load(CommandLine commandLine) throws IOException, TikaException {
+
+ TikaServerConfig config = null;
+ if (commandLine.hasOption("c")) {
+ config = load(Paths.get(commandLine.getOptionValue("c")));
+ config.setConfigPath(commandLine.getOptionValue("c"));
+ } else {
+ config = new TikaServerConfig();
+ }
+
+ //overwrite with the commandline
+ if (commandLine.hasOption("p")) {
+ int port = -1;
+ try {
+ config.setPort(Integer.parseInt(commandLine.getOptionValue("p")));
+ config.setPortString(commandLine.getOptionValue("p"));
+ } catch (NumberFormatException e) {
+ config.setPortString(commandLine.getOptionValue("p"));
+ }
+ }
+ if (commandLine.hasOption("h")) {
+ config.setHost(commandLine.getOptionValue("h"));
+ }
+
+ if (commandLine.hasOption("i")) {
+ config.setId(commandLine.getOptionValue("i"));
+ }
+
+ if (commandLine.hasOption("numRestarts")) {
+ config.setNumRestarts(Integer.parseInt(commandLine.getOptionValue("numRestarts")));
+ }
+
+ if (commandLine.hasOption("forkedStatusFile")) {
+ config.setForkedStatusFile(commandLine.getOptionValue("forkedStatusFile"));
+ }
+ config.validateConsistency();
+ return config;
+ }
+
+ private void setPortString(String portString) {
+ this.portString = portString;
+ }
+
+ private void setId(String id) {
+ this.idBase = id;
+ }
+
+ public static TikaServerConfig load (Path p) throws IOException, TikaException {
+ try (InputStream is = Files.newInputStream(p)) {
+ return TikaServerConfig.load(is);
+ }
+ }
+
+ public static TikaServerConfig load(InputStream is) throws IOException, TikaException {
+ Node properties = null;
+ try {
+ properties = XMLReaderUtils.buildDOM(is).getDocumentElement();
+ } catch (SAXException e) {
+ throw new IOException(e);
+ }
+ if (! properties.getLocalName().equals("properties")) {
+ throw new TikaConfigException("expect settings as root node");
+ }
+ NodeList children = properties.getChildNodes();
+ TikaServerConfig config = new TikaServerConfig();
+ for (int i = 0; i < children.getLength(); i++) {
+ Node child = children.item(i);
+ if ("server".equals(child.getLocalName())) {
+ loadServerConfig(child, config);
+ }
+ }
+ config.validateConsistency();
+ return config;
+ }
+
+ private static void loadServerConfig(Node server, TikaServerConfig config)
+ throws TikaConfigException {
+ NodeList params = server.getChildNodes();
+ for (int i = 0; i < params.getLength(); i++) {
+ Node param = params.item(i);
+ String localName = param.getLocalName();
+ String txt = param.getTextContent();
+ if ("endpoints".equals(localName)) {
+ config.addEndPoints(loadStringList("endpoint", param.getChildNodes()));
+ } else if ("forkedJVMArgs".equals(localName)) {
+ config.addJVMArgs(loadStringList("arg", param.getChildNodes()));
+ } else if (localName != null && txt != null) {
+ if ("port".equals(localName)) {
+ config.setPortString(txt);
+ } else {
+ tryToSet(config, localName, txt);
+ }
+ }
+ }
+ }
+
+ private static void tryToSet(TikaServerConfig config, String localName, String txt) throws TikaConfigException {
+ String setter = "set"+localName.substring(0,1).toUpperCase(Locale.US)+localName.substring(1);
+ Class[] types = new Class[]{String.class, boolean.class, int.class, long.class};
+ for (Class t : types) {
+ try {
+ Method m = TikaServerConfig.class.getMethod(setter, t);
+ if (t == int.class) {
+ try {
+ m.invoke(config, Integer.parseInt(txt));
+ return;
+ } catch (IllegalAccessException|InvocationTargetException e) {
+ throw new TikaConfigException("bad parameter "+setter, e);
+ }
+ } else if (t == long.class) {
+ try {
+ m.invoke(config, Long.parseLong(txt));
+ return;
+ } catch (IllegalAccessException | InvocationTargetException e) {
+ throw new TikaConfigException("bad parameter " + setter, e);
+ }
+ } else if (t == boolean.class) {
+ try {
+ m.invoke(config, Boolean.parseBoolean(txt));
+ return;
+ } catch (IllegalAccessException | InvocationTargetException e) {
+ throw new TikaConfigException("bad parameter " + setter, e);
+ }
+ } else {
+ try {
+ m.invoke(config, txt);
+ return;
+ } catch (IllegalAccessException|InvocationTargetException e) {
+ throw new TikaConfigException("bad parameter "+setter, e);
+ }
+ }
+ } catch (NoSuchMethodException e) {
+ //swallow
+ }
+ }
+ throw new TikaConfigException("Couldn't find setter: "+setter);
+ }
+
+ private static List<String> loadStringList(String itemName, NodeList nodelist) {
+ List<String> list = new ArrayList<>();
+ for (int i = 0; i < nodelist.getLength(); i++) {
+ Node n = nodelist.item(i);
+ if (itemName.equals(n.getLocalName())) {
+ list.add(n.getTextContent());
+ }
+ }
+ return list;
+ }
+
+ /*
+ TODO: integrate these settings:
+ * Number of milliseconds to wait to start forked process.
+ public static final long DEFAULT_FORKED_PROCESS_STARTUP_MILLIS = 60000;
+
+ * Maximum number of milliseconds to wait to shutdown forked process to allow
+ * for current parses to complete.
+ public static final long DEFAULT_FORKED_PROCESS_SHUTDOWN_MILLIS = 30000;
+
+ private long forkedProcessStartupMillis = DEFAULT_FORKED_PROCESS_STARTUP_MILLIS;
+
+ private long forkedProcessShutdownMillis = DEFAULT_FORKED_PROCESS_SHUTDOWN_MILLIS;
+
+ */
+
+
+
+ /**
+ * If the forked process doesn't receive a ping or the parent doesn't
+ * hear back from a ping in this amount of time, terminate and restart the forked process.
+ */
+ public static final long DEFAULT_PING_TIMEOUT_MILLIS = 30000;
+
+ /**
+ * How often should the parent try to ping the forked process to check status
+ */
+ public static final long DEFAULT_PING_PULSE_MILLIS = 500;
+
+ /**
+ * Number of milliseconds to wait per server task (parse, detect, unpack, translate,
+ * etc.) before timing out and shutting down the forked process.
+ */
+ public static final long DEFAULT_TASK_TIMEOUT_MILLIS = 120000;
+
+ /**
+ * Number of milliseconds to wait for forked process to startup
+ */
+ public static final long DEFAULT_FORKED_STARTUP_MILLIS = 120000;
+
+ private int maxRestarts = -1;
+ private long maxFiles = 100000;
+ private long taskTimeoutMillis = DEFAULT_TASK_TIMEOUT_MILLIS;
+ private long pingTimeoutMillis = DEFAULT_PING_TIMEOUT_MILLIS;
+ private long pingPulseMillis = DEFAULT_PING_PULSE_MILLIS;
+ private long maxforkedStartupMillis = DEFAULT_FORKED_STARTUP_MILLIS;
+ private boolean enableUnsecureFeatures = false;
+ private String cors = "";
+ private boolean returnStackTrace = false;
+ private boolean noFork = false;
+ private String tempFilePrefix = "tika-server-tmp-"; //can be set for debugging
+ private List<String> forkedJvmArgs = new ArrayList<>();
+ private String idBase = UUID.randomUUID().toString();
+ private String portString = Integer.toString(DEFAULT_PORT);
+ private int port = DEFAULT_PORT;
+ private String host = DEFAULT_HOST;
+
+ private int digestMarkLimit = DEFAULT_DIGEST_MARK_LIMIT;
+ private String digest = "";
+ //debug or info only
+ private String logLevel = "";
+ private Path configPath;
+ private List<String> endPoints = new ArrayList<>();
+
+ //these should only be set in the forked process
+ //and they are automatically set by the forking process
+ private String forkedStatusFile;
+ private int numRestarts = 0;
+
+ public boolean isNoFork() {
+ return noFork;
+ }
+
+ public String getPortString() {
+ return portString;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public void setPort(int port) {
+ this.port = port;
+ }
+ /**
+ * How long to wait for a task before shutting down the forked server process
+ * and restarting it.
+ * @return
+ */
+ public long getTaskTimeoutMillis() {
+ return taskTimeoutMillis;
+ }
+
+ /**
+ *
+ * @param taskTimeoutMillis number of milliseconds to allow per task
+ * (parse, detection, unzipping, etc.)
+ */
+ public void setTaskTimeoutMillis(long taskTimeoutMillis) {
+ this.taskTimeoutMillis = taskTimeoutMillis;
+ }
+
+ public long getPingTimeoutMillis() {
+ return pingTimeoutMillis;
+ }
+
+ /**
+ *
+ * @param pingTimeoutMillis if the parent doesn't receive a response
+ * in this amount of time, or
+ * if the forked doesn't receive a ping
+ * in this amount of time, restart the forked process
+ */
+ public void setPingTimeoutMillis(long pingTimeoutMillis) {
+ this.pingTimeoutMillis = pingTimeoutMillis;
+ }
+
+ public long getPingPulseMillis() {
+ return pingPulseMillis;
+ }
+
+ /**
+ *
+ * @param pingPulseMillis how often to test that the parent and/or forked is alive
+ */
+ public void setPingPulseMillis(long pingPulseMillis) {
+ this.pingPulseMillis = pingPulseMillis;
+ }
+
+ public int getMaxRestarts() {
+ return maxRestarts;
+ }
+
+ public void setMaxRestarts(int maxRestarts) {
+ this.maxRestarts = maxRestarts;
+ }
+
+ public void setHost(String host) {
+ if ("*".equals(host)) {
+ host = "0.0.0.0";
+ }
+ this.host = host;
+ }
+
+ /**
+ * Maximum time in millis to allow for the forked process to startup
+ * or restart
+ * @return
+ */
+ public long getMaxForkedStartupMillis() {
+ return maxforkedStartupMillis;
+ }
+
+ public void setMaxForkedStartupMillis(long maxForkedStartupMillis) {
+ this.maxforkedStartupMillis = maxForkedStartupMillis;
+ }
+
+ public List<String> getForkedProcessArgs(int port, String id) {
+ //these are the arguments for the forked process
+ List<String> args = new ArrayList<>();
+ args.add("-p");
+ args.add(Integer.toString(port));
+ args.add("-i");
+ args.add(id);
+ if (hasConfigFile()) {
+ args.add("-c");
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ configPath.toAbsolutePath().toString()));
+ }
+ return args;
+ }
+
+ public String getIdBase() {
+ return idBase;
+ }
+
+ /**
+ * full path to the java executable
+ * @return
+ */
+ public String getJavaPath() {
+ return "java";
+ }
+
+ public List<String> getForkedJvmArgs() {
+ return forkedJvmArgs;
+ }
+
+ public String getTempFilePrefix() {
+ return tempFilePrefix;
+ }
+
+ public boolean isEnableUnsecureFeatures() {
+ return enableUnsecureFeatures;
+ }
+
+ private void validateConsistency() throws TikaConfigException {
+ if (host == null) {
+ throw new TikaConfigException("Must specify 'host'");
+ }
+ if (!StringUtils.isBlank(portString)) {
+ try {
+ setPort(Integer.parseInt(portString));
+ } catch (NumberFormatException e) {
+
+ }
+ }
+ }
+
+ public String getHost() {
+ return host;
+ }
+
+ public void setLogLevel(String level) throws TikaConfigException {
+ if (level.equals("debug") || level.equals("info")) {
+ this.logLevel = level;
+ } else {
+ throw new TikaConfigException("log level must be one of: 'debug' or 'info'");
+ }
+ }
+ public String getLogLevel() {
+ return logLevel;
+ }
+
+ /**
+ *
+ * @return the origin url for cors, can be "*"
+ */
+ public String getCors() {
+ return cors;
+ }
+
+ public boolean hasConfigFile() {
+ return configPath != null;
+ }
+
+ public void setConfigPath(String path) {
+ this.configPath = Paths.get(path);
+ }
+
+ public Path getConfigPath() {
+ return configPath;
+ }
+
+ public int getDigestMarkLimit() {
+ return digestMarkLimit;
+ }
+
+ /**
+ * digest configuration string, e.g. md5 or sha256, alternately w 16 or 32 encoding,
+ * e.g. md5:32,sha256:16 would result in two digests per file
+ * @return
+ */
+ public String getDigest() {
+ return digest;
+ }
+
+
+ /**
+ * maximum number of files before the forked server restarts.
+ * This is useful for avoiding any slow-building memory leaks/bloat.
+ * @return
+ */
+ public long getMaxFiles() {
+ return maxFiles;
+ }
+
+ public void setMaxFiles(long maxFiles) {
+ this.maxFiles = maxFiles;
+ }
+
+ public boolean isReturnStackTrace() {
+ return returnStackTrace;
+ }
+
+ public void setReturnStackTrace(boolean returnStackTrace) {
+ this.returnStackTrace = returnStackTrace;
+ }
+
+ public List<String> getEndPoints() {
+ return endPoints;
+ }
+
+ public String getId() {
+ //TODO fix this
+ return idBase;
+ }
+
+ private void addEndPoints(List<String> endPoints) {
+ this.endPoints.addAll(endPoints);
+ }
+
+ private void addJVMArgs(List<String> args) {
+ this.forkedJvmArgs.addAll(args);
+ }
+
+ public void setEnableUnsecureFeatures(boolean enableUnsecureFeatures) {
+ this.enableUnsecureFeatures = enableUnsecureFeatures;
+ }
+
+ /******
+ * these should only be used in the commandline for a forked process
+ ******/
+
+
+ private void setNumRestarts(int numRestarts) {
+ this.numRestarts = numRestarts;
+ }
+
+ public int getNumRestarts() {
+ return numRestarts;
+ }
+
+ public String getForkedStatusFile() {
+ return forkedStatusFile;
+ }
+
+ private void setForkedStatusFile(String forkedStatusFile) {
+ this.forkedStatusFile = forkedStatusFile;
+ }
+
+}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
index 5135f55..69b561f 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
@@ -64,6 +64,7 @@ import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
import org.apache.tika.server.core.writer.TarWriter;
import org.apache.tika.server.core.writer.TextMessageBodyWriter;
import org.apache.tika.server.core.writer.ZipWriter;
+import org.apache.tika.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -100,34 +101,11 @@ public class TikaServerProcess {
private static Options getOptions() {
Options options = new Options();
- options.addOption("C", "cors", true, "origin allowed to make CORS requests (default=NONE)\nall allowed if \"all\"");
options.addOption("h", "host", true, "host name, use * for all)");
options.addOption("p", "port", true, "listen port");
options.addOption("c", "config", true, "Tika Configuration file to override default config with.");
- options.addOption("d", "digest", true, "include digest in metadata, e.g. md5,sha1:32,sha256");
- options.addOption("dml", "digestMarkLimit", true, "max number of bytes to mark on stream for digest");
- options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
- options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
options.addOption("i", "id", true, "id to use for server in server status endpoint");
- options.addOption("status", false, "enable the status endpoint");
options.addOption("?", "help", false, "this help message");
- options.addOption("enableUnsecureFeatures", false, "this is required to enable fetchers and emitters. " +
- " The user acknowledges that fetchers and emitters introduce potential security vulnerabilities.");
- options.addOption("noFork", false, "legacy mode, less robust -- this starts up tika-server" +
- " without forking a process.");
- options.addOption("taskTimeoutMillis", true,
- "Not allowed in -noFork: how long to wait for a task (e.g. parse) to finish");
- options.addOption("taskPulseMillis", true,
- "Not allowed in -noFork: how often to check if a task has timed out.");
- options.addOption("pingTimeoutMillis", true,
- "Not allowed in -noFork: how long to wait to wait for a ping and/or ping response.");
- options.addOption("pingPulseMillis", true,
- "Not allowed in -noFork: how often to check if a ping has timed out.");
- options.addOption("maxFiles", true,
- "Not allowed in -noFork: shutdown server after this many files (to handle parsers that might introduce " +
- "slowly building memory leaks); the default is " + DEFAULT_MAX_FILES + ". Set to -1 to turn this off.");
- options.addOption("javaHome", true,
- "Not allowed in -noFork: override system property JAVA_HOME for calling java for the forked process");
options.addOption("forkedStatusFile", true,
"Not allowed in -noFork: temporary file used to communicate " +
"with forking process -- do not use this! Should only be invoked by forking process.");
@@ -144,7 +122,9 @@ public class TikaServerProcess {
Options options = getOptions();
CommandLineParser cliParser = new DefaultParser();
CommandLine line = cliParser.parse(options, args);
- mainLoop(line, options);
+ TikaServerConfig tikaServerConfig = TikaServerConfig.load(line);
+
+ mainLoop(tikaServerConfig);
} catch (Exception e) {
e.printStackTrace();
LOG.error("Can't start: ", e);
@@ -152,18 +132,18 @@ public class TikaServerProcess {
}
}
- private static void mainLoop(CommandLine commandLine, Options options) throws Exception {
+ private static void mainLoop(TikaServerConfig tikaServerConfig) throws Exception {
AsyncResource asyncResource = null;
ArrayBlockingQueue<FetchEmitTuple> asyncFetchEmitQueue = null;
ArrayBlockingQueue<EmitData> asyncEmitData = null;
int numAsyncParserThreads = 10;
- if (commandLine.hasOption(ENABLE_UNSECURE_FEATURES)) {
+ if (tikaServerConfig.isEnableUnsecureFeatures()) {
asyncResource = new AsyncResource();
asyncFetchEmitQueue = asyncResource.getFetchEmitQueue(10000);
asyncEmitData = asyncResource.getEmitDataQueue(1000);
}
- ServerDetails serverDetails = initServer(commandLine, asyncResource);
+ ServerDetails serverDetails = initServer(tikaServerConfig, asyncResource);
ExecutorService executorService = Executors.newFixedThreadPool(numAsyncParserThreads+1);
ExecutorCompletionService<Integer> executorCompletionService = new ExecutorCompletionService<>(executorService);
@@ -188,79 +168,33 @@ public class TikaServerProcess {
}
//This returns the server, configured and ready to be started.
- private static ServerDetails initServer(CommandLine line,
+ private static ServerDetails initServer(TikaServerConfig tikaServerConfig,
AsyncResource asyncResource) throws Exception {
- String host = null;
-
- if (line.hasOption("host")) {
- host = line.getOptionValue("host");
- if ("*".equals(host)) {
- host = "0.0.0.0";
- }
- } else {
- throw new IllegalArgumentException("Must specify 'host'");
- }
-
- int port = -1;
-
- if (line.hasOption("port")) {
- port = Integer.valueOf(line.getOptionValue("port"));
- } else {
- throw new IllegalArgumentException("Must specify port");
- }
-
- boolean returnStackTrace = false;
- if (line.hasOption("includeStack")) {
- returnStackTrace = true;
- }
-
- TikaLoggingFilter logFilter = null;
- if (line.hasOption("log")) {
- String logLevel = line.getOptionValue("log");
- if (LOG_LEVELS.contains(logLevel)) {
- boolean isInfoLevel = "info".equals(logLevel);
- logFilter = new TikaLoggingFilter(isInfoLevel);
- } else {
- LOG.info("Unsupported request URI log level: {}", logLevel);
- }
- }
-
- CrossOriginResourceSharingFilter corsFilter = null;
- if (line.hasOption("cors")) {
- corsFilter = new CrossOriginResourceSharingFilter();
- String url = line.getOptionValue("cors");
- List<String> origins = new ArrayList<>();
- if (!url.equals("*")) origins.add(url); // Empty list allows all origins.
- corsFilter.setAllowOrigins(origins);
- }
+ String host = tikaServerConfig.getHost();
+ int port = tikaServerConfig.getPort();
// The Tika Configuration to use throughout
TikaConfig tika;
- if (line.hasOption("config")) {
- String configFilePath = line.getOptionValue("config");
- LOG.info("Using custom config: {}", configFilePath);
- tika = new TikaConfig(configFilePath);
+ if (tikaServerConfig.hasConfigFile()) {
+ LOG.info("Using custom config: {}",
+ tikaServerConfig.getConfigPath());
+ tika = new TikaConfig(tikaServerConfig.getConfigPath());
} else {
tika = TikaConfig.getDefaultConfig();
}
DigestingParser.Digester digester = null;
- if (line.hasOption("digest")) {
- int digestMarkLimit = DEFAULT_DIGEST_MARK_LIMIT;
- if (line.hasOption("dml")) {
- String dmlS = line.getOptionValue("dml");
- try {
- digestMarkLimit = Integer.parseInt(dmlS);
- } catch (NumberFormatException e) {
- throw new RuntimeException("Must have parseable int after digestMarkLimit(dml): " + dmlS);
- }
- }
+ if (! StringUtils.isBlank(tikaServerConfig.getDigest())) {
try {
- digester = new CommonsDigester(digestMarkLimit, line.getOptionValue("digest"));
+ digester = new CommonsDigester(
+ tikaServerConfig.getDigestMarkLimit(),
+ tikaServerConfig.getDigest());
} catch (IllegalArgumentException commonsException) {
try {
- digester = new BouncyCastleDigester(digestMarkLimit, line.getOptionValue("digest"));
+ digester = new BouncyCastleDigester(
+ tikaServerConfig.getDigestMarkLimit(),
+ tikaServerConfig.getDigest());
} catch (IllegalArgumentException bcException) {
throw new IllegalArgumentException("Tried both CommonsDigester (" + commonsException.getMessage() +
") and BouncyCastleDigester (" + bcException.getMessage() + ")", bcException);
@@ -269,83 +203,45 @@ public class TikaServerProcess {
}
InputStreamFactory inputStreamFactory = null;
- if (line.hasOption(ENABLE_UNSECURE_FEATURES)) {
+ if (tikaServerConfig.isEnableUnsecureFeatures()) {
inputStreamFactory = new FetcherStreamFactory(tika.getFetcherManager());
} else {
inputStreamFactory = new DefaultInputStreamFactory();
}
- logFetchersAndEmitters(line.hasOption(ENABLE_UNSECURE_FEATURES), tika);
- String serverId = line.hasOption("i") ? line.getOptionValue("i") : UUID.randomUUID().toString();
+ logFetchersAndEmitters(tikaServerConfig.isEnableUnsecureFeatures(), tika);
+
+ String serverId = tikaServerConfig.getId();
LOG.debug("SERVER ID:" + serverId);
ServerStatus serverStatus;
- if (line.hasOption("noFork")) {
+ if (tikaServerConfig.isNoFork()) {
serverStatus = new ServerStatus(serverId, 0, true);
} else {
- serverStatus = new ServerStatus(serverId, Integer.parseInt(line.getOptionValue("numRestarts")),
+ serverStatus = new ServerStatus(serverId,
+ tikaServerConfig.getNumRestarts(),
false);
//redirect!!!
InputStream in = System.in;
System.setIn(new ByteArrayInputStream(new byte[0]));
System.setOut(System.err);
- long maxFiles = DEFAULT_MAX_FILES;
- if (line.hasOption("maxFiles")) {
- maxFiles = Long.parseLong(line.getOptionValue("maxFiles"));
- }
-
- ServerTimeoutConfig serverTimeouts = configureServerTimeouts(line);
- String forkedStatusFile = line.getOptionValue("forkedStatusFile");
+ String forkedStatusFile = tikaServerConfig.getForkedStatusFile();
Thread serverThread =
new Thread(new ServerStatusWatcher(serverStatus, in,
- Paths.get(forkedStatusFile), maxFiles, serverTimeouts));
+ Paths.get(forkedStatusFile), tikaServerConfig));
serverThread.start();
}
TikaResource.init(tika, digester, inputStreamFactory, serverStatus);
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
- List<ResourceProvider> rCoreProviders = new ArrayList<>();
- rCoreProviders.add(new SingletonResourceProvider(new MetadataResource()));
- rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource()));
- rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(serverStatus)));
- rCoreProviders.add(new SingletonResourceProvider(new LanguageResource()));
- rCoreProviders.add(new SingletonResourceProvider(new TranslateResource(serverStatus)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaResource()));
- rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource()));
- rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes()));
- rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors()));
- rCoreProviders.add(new SingletonResourceProvider(new TikaParsers()));
- rCoreProviders.add(new SingletonResourceProvider(new TikaVersion()));
- if (line.hasOption(ENABLE_UNSECURE_FEATURES)) {
- rCoreProviders.add(new SingletonResourceProvider(new EmitterResource()));
- rCoreProviders.add(new SingletonResourceProvider(asyncResource));
- }
- rCoreProviders.addAll(loadResourceServices());
- if (line.hasOption("status")) {
- rCoreProviders.add(new SingletonResourceProvider(new TikaServerStatus(serverStatus)));
- }
- List<ResourceProvider> rAllProviders = new ArrayList<>(rCoreProviders);
- rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(rCoreProviders)));
- sf.setResourceProviders(rAllProviders);
-
+ List<ResourceProvider> resourceProviders = new ArrayList<>();
List<Object> providers = new ArrayList<>();
- providers.add(new TarWriter());
- providers.add(new ZipWriter());
- providers.add(new CSVMessageBodyWriter());
- providers.add(new MetadataListMessageBodyWriter());
- providers.add(new JSONMessageBodyWriter());
- providers.add(new TextMessageBodyWriter());
- providers.addAll(loadWriterServices());
- providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
- providers.add(new JSONObjWriter());
-
- if (logFilter != null) {
- providers.add(logFilter);
- }
- if (corsFilter != null) {
- providers.add(corsFilter);
- }
+ loadAllProviders(tikaServerConfig, asyncResource, serverStatus,
+ resourceProviders, providers);
+
+ sf.setResourceProviders(resourceProviders);
+
sf.setProviders(providers);
//set compression interceptors
@@ -368,6 +264,112 @@ public class TikaServerProcess {
return details;
}
+ private static void loadAllProviders(TikaServerConfig tikaServerConfig,
+ AsyncResource asyncResource, ServerStatus serverStatus,
+ List<ResourceProvider> resourceProviders,
+ List<Object> writers) {
+ List<ResourceProvider> tmpCoreProviders = loadCoreProviders(
+ tikaServerConfig, asyncResource, serverStatus);
+
+ resourceProviders.addAll(tmpCoreProviders);
+ resourceProviders.add(new SingletonResourceProvider(
+ new TikaWelcome(tmpCoreProviders)));
+
+ //for now, just load everything
+ writers.add(new TarWriter());
+ writers.add(new ZipWriter());
+ writers.add(new CSVMessageBodyWriter());
+ writers.add(new MetadataListMessageBodyWriter());
+ writers.add(new JSONMessageBodyWriter());
+ writers.add(new TextMessageBodyWriter());
+ writers.addAll(loadWriterServices());
+ writers.add(new TikaServerParseExceptionMapper(
+ tikaServerConfig.isReturnStackTrace()));
+ writers.add(new JSONObjWriter());
+
+ TikaLoggingFilter logFilter = null;
+ if (!StringUtils.isBlank(tikaServerConfig.getLogLevel())) {
+ String logLevel = tikaServerConfig.getLogLevel();
+ if (LOG_LEVELS.contains(logLevel)) {
+ boolean isInfoLevel = "info".equals(logLevel);
+ logFilter = new TikaLoggingFilter(isInfoLevel);
+ writers.add(logFilter);
+ } else {
+ LOG.warn("Unsupported request URI log level: {}", logLevel);
+ }
+ }
+
+ CrossOriginResourceSharingFilter corsFilter = null;
+ if (!StringUtils.isBlank(tikaServerConfig.getCors())) {
+ corsFilter = new CrossOriginResourceSharingFilter();
+ String url = tikaServerConfig.getCors();
+ List<String> origins = new ArrayList<>();
+ if (!url.equals("*")) origins.add(url); // Empty list allows all origins.
+ corsFilter.setAllowOrigins(origins);
+ writers.add(corsFilter);
+ }
+
+ }
+
+ private static List<ResourceProvider> loadCoreProviders(
+ TikaServerConfig tikaServerConfig, AsyncResource asyncResource, ServerStatus serverStatus) {
+ List<ResourceProvider> resourceProviders = new ArrayList<>();
+ if (tikaServerConfig.getEndPoints().size() == 0) {
+ resourceProviders.add(new SingletonResourceProvider(new MetadataResource()));
+ resourceProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource()));
+ resourceProviders.add(new SingletonResourceProvider(new DetectorResource(serverStatus)));
+ resourceProviders.add(new SingletonResourceProvider(new LanguageResource()));
+ resourceProviders.add(new SingletonResourceProvider(new TranslateResource(serverStatus)));
+ resourceProviders.add(new SingletonResourceProvider(new TikaResource()));
+ resourceProviders.add(new SingletonResourceProvider(new UnpackerResource()));
+ resourceProviders.add(new SingletonResourceProvider(new TikaMimeTypes()));
+ resourceProviders.add(new SingletonResourceProvider(new TikaDetectors()));
+ resourceProviders.add(new SingletonResourceProvider(new TikaParsers()));
+ resourceProviders.add(new SingletonResourceProvider(new TikaVersion()));
+ if (tikaServerConfig.isEnableUnsecureFeatures()) {
+ resourceProviders.add(new SingletonResourceProvider(new EmitterResource()));
+ resourceProviders.add(new SingletonResourceProvider(asyncResource));
+ resourceProviders.add(new SingletonResourceProvider(new TikaServerStatus(serverStatus)));
+ }
+ resourceProviders.addAll(loadResourceServices());
+ return resourceProviders;
+ }
+ for (String endPoint : tikaServerConfig.getEndPoints()) {
+ if ("meta".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new MetadataResource()));
+ } else if ("rmeta".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource()));
+ } else if ("detect".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new DetectorResource(serverStatus)));
+ } else if ("language".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new LanguageResource()));
+ } else if ("translate".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TranslateResource(serverStatus)));
+ } else if ("tika".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaResource()));
+ } else if ("unpack".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new UnpackerResource()));
+ } else if ("mime".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaMimeTypes()));
+ } else if ("detectors".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaDetectors()));
+ } else if ("parsers".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaParsers()));
+ } else if ("version".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaVersion()));
+ } else if ("emit".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new EmitterResource()));
+ } else if ("async".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(asyncResource));
+ } else if ("status".equals(endPoint)) {
+ resourceProviders.add(new SingletonResourceProvider(new TikaServerStatus(serverStatus)));
+ }
+ resourceProviders.addAll(loadResourceServices());
+ }
+ System.out.println("loaded "+resourceProviders);
+ return resourceProviders;
+ }
+
private static void logFetchersAndEmitters(boolean enableUnsecureFeatures, TikaConfig tika) {
if (enableUnsecureFeatures) {
StringBuilder sb = new StringBuilder();
@@ -431,41 +433,6 @@ public class TikaServerProcess {
System.exit(-1);
}
- private static ServerTimeoutConfig configureServerTimeouts(CommandLine line) {
- ServerTimeoutConfig serverTimeouts = new ServerTimeoutConfig();
- /*TODO -- add these in
- if (line.hasOption("forkedProcessStartupMillis")) {
- serverTimeouts.setForkedProcessStartupMillis(
- Long.parseLong(line.getOptionValue("forkedProcessStartupMillis")));
- }
- if (line.hasOption("forkedProcessShutdownMillis")) {
- serverTimeouts.setForkedProcessShutdownMillis(
- Long.parseLong(line.getOptionValue("forkedProcesShutdownMillis")));
- }*/
- if (line.hasOption("taskTimeoutMillis")) {
- serverTimeouts.setTaskTimeoutMillis(
- Long.parseLong(line.getOptionValue("taskTimeoutMillis")));
- }
- if (line.hasOption("pingTimeoutMillis")) {
- serverTimeouts.setPingTimeoutMillis(
- Long.parseLong(line.getOptionValue("pingTimeoutMillis")));
- }
- if (line.hasOption("pingPulseMillis")) {
- serverTimeouts.setPingPulseMillis(
- Long.parseLong(line.getOptionValue("pingPulseMillis")));
- }
-
- if (line.hasOption("maxRestarts")) {
- serverTimeouts.setMaxRestarts(Integer.parseInt(line.getOptionValue("maxRestarts")));
- }
-
- if (line.hasOption("maxForkedStartupMillis")) {
- serverTimeouts.setMaxForkedStartupMillis(
- Long.parseLong(line.getOptionValue("maxForkedStartupMillis")));
- }
-
- return serverTimeouts;
- }
private static class ServerDetails {
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
index 3670553..5cfbf05 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
@@ -64,40 +64,29 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
private volatile Instant lastPing = null;
private ForkedProcess forkedProcess = null;
- private final String[] args;
private final int port;
private final String id;
private final int restarts;
- private final ServerTimeoutConfig serverTimeoutConfig;
+ private final TikaServerConfig tikaServerConfig;
- TikaServerWatchDog(String[] args, int port, String id, int restarts,
- ServerTimeoutConfig serverTimeoutConfig) {
- this.args = addPortAndId(args, port, id);
+ TikaServerWatchDog(int port, String id, int restarts,
+ TikaServerConfig tikaServerConfig) {
this.port = port;
this.id = id;
this.restarts = restarts;
- this.serverTimeoutConfig = serverTimeoutConfig;
+ this.tikaServerConfig = tikaServerConfig;
}
- private static String[] addPortAndId(String[] args, int port, String id) {
- List<String> newArgs = new ArrayList<>();
- newArgs.addAll(Arrays.asList(args));
- newArgs.add("-p");
- newArgs.add(Integer.toString(port));
- newArgs.add("-i");
- newArgs.add(id);
- return newArgs.toArray(new String[0]);
- }
@Override
public WatchDogResult call() throws Exception {
LOG.info("server watch dog is starting up");
try {
- forkedProcess = new ForkedProcess(args, restarts, serverTimeoutConfig);
+ forkedProcess = new ForkedProcess(restarts);
setForkedStatus(FORKED_STATUS.RUNNING);
- startPingTimer(serverTimeoutConfig);
+ startPingTimer();
while (forkedProcess.ping()) {
- Thread.sleep(serverTimeoutConfig.getPingPulseMillis());
+ Thread.sleep(tikaServerConfig.getPingPulseMillis());
}
} catch (InterruptedException e) {
//interrupted...shutting down
@@ -112,7 +101,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
return new WatchDogResult(port, id,restarts+1);
}
- private void startPingTimer(ServerTimeoutConfig serverTimeouts) {
+ private void startPingTimer() {
//if the forked thread is in stop-the-world mode, and isn't
//reading the ping, this thread checks to make sure
//that the parent ping is sent often enough.
@@ -131,7 +120,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
}
if (tmpLastPing > 0) {
long elapsed = Duration.between(Instant.ofEpochMilli(tmpLastPing), Instant.now()).toMillis();
- if (elapsed > serverTimeouts.getPingTimeoutMillis()) {
+ if (elapsed > tikaServerConfig.getPingTimeoutMillis()) {
Process processToDestroy = null;
try {
processToDestroy = forkedProcess.process;
@@ -145,7 +134,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
}
}
try {
- Thread.sleep(serverTimeouts.getPingPulseMillis());
+ Thread.sleep(tikaServerConfig.getPingPulseMillis());
} catch (InterruptedException e) {
//swallow
}
@@ -228,28 +217,21 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
private final Process process;
private final DataOutputStream toForked;
- private final ServerTimeoutConfig serverTimeoutConfig;
private final Path forkedStatusFile;
private final ByteBuffer statusBuffer = ByteBuffer.allocate(16);
- private ForkedProcess(String[] args, int numRestarts, ServerTimeoutConfig serverTimeoutConfig) throws Exception {
- String prefix = DEFAULT_FORKED_STATUS_FILE_PREFIX;
- for (int i = 0; i < args.length; i++) {
- if (args[i].equals("-tmpFilePrefix")) {
- prefix = args[i+1];
- }
- }
+ private ForkedProcess(int numRestarts) throws Exception {
+ String prefix = tikaServerConfig.getTempFilePrefix();
this.forkedStatusFile = Files.createTempFile(prefix, "");
- this.serverTimeoutConfig = serverTimeoutConfig;
- this.process = startProcess(args, numRestarts, forkedStatusFile);
+ this.process = startProcess(numRestarts, forkedStatusFile);
//wait for file to be written/initialized by forked process
Instant start = Instant.now();
long elapsed = Duration.between(start, Instant.now()).toMillis();
try {
while (process.isAlive() && Files.size(forkedStatusFile) < 12
- && elapsed < serverTimeoutConfig.getMaxForkedStartupMillis()) {
+ && elapsed < tikaServerConfig.getMaxForkedStartupMillis()) {
Thread.sleep(50);
elapsed = Duration.between(start, Instant.now()).toMillis();
}
@@ -259,7 +241,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
LOG.warn("failed to start forked process", e);
}
- if (elapsed > serverTimeoutConfig.getMaxForkedStartupMillis()) {
+ if (elapsed > tikaServerConfig.getMaxForkedStartupMillis()) {
close();
throw new RuntimeException("Forked process failed to start after "+elapsed + " (ms)");
}
@@ -308,7 +290,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
forkedStatus.status);
if (elapsedSinceLastUpdate >
- serverTimeoutConfig.getPingTimeoutMillis()) {
+ tikaServerConfig.getPingTimeoutMillis()) {
//forked hasn't written a status update in a longer time than allowed
LOG.warn("Forked's last update exceeded ping timeout: {} (ms) with status {}",
elapsedSinceLastUpdate, forkedStatus.status);
@@ -325,7 +307,7 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
//only reading, but need to include write to allow for locking
try (FileChannel fc = FileChannel.open(forkedStatusFile, READ, WRITE)) {
- while (elapsed < serverTimeoutConfig.getPingTimeoutMillis()) {
+ while (elapsed < tikaServerConfig.getPingTimeoutMillis()) {
try (FileLock lock = fc.tryLock(0, 16, true)) {
if (lock != null) {
((Buffer)statusBuffer).position(0);
@@ -378,15 +360,15 @@ public class TikaServerWatchDog implements Callable<WatchDogResult> {
}
- private Process startProcess(String[] args, int numRestarts, Path forkedStatusFile) throws IOException {
+ private Process startProcess(int numRestarts, Path forkedStatusFile) throws IOException {
ProcessBuilder builder = new ProcessBuilder();
builder.redirectError(ProcessBuilder.Redirect.INHERIT);
List<String> argList = new ArrayList<>();
- String javaPath = extractJavaPath(args);
- List<String> jvmArgs = extractJVMArgs(args);
- List<String> forkedArgs = extractArgs(args);
+ String javaPath = tikaServerConfig.getJavaPath();
+ List<String> jvmArgs = tikaServerConfig.getForkedJvmArgs();
+ List<String> forkedArgs = tikaServerConfig.getForkedProcessArgs(port, id);
forkedArgs.add("-forkedStatusFile");
forkedArgs.add(ProcessUtils.escapeCommandLine(forkedStatusFile.toAbsolutePath().toString()));
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
index 647da2d..434f8b3 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
@@ -58,7 +58,6 @@ public class MetadataListMessageBodyWriter implements MessageBodyWriter<Metadata
WebApplicationException {
Writer writer = new OutputStreamWriter(entityStream, UTF_8);
JsonMetadataList.toJson(list.getMetadata(), writer);
- writer.flush();
entityStream.flush();
}
}
diff --git a/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml b/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml
new file mode 100644
index 0000000..95d8550
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <server>
+ <!-- which port to start the server on. If you specify a range,
+ e.g. 9995-9998, TikaServerCli will start four forked servers,
+ one at each port. You can also specify multiple forked servers
+ via a comma-delimited value: 9995,9997.
+
+ -->
+ <port>9998</port>
+ <host>localhost</host>
+ <!-- if specified, this will be the id that is used in the
+ /status endpoint and elsewhere. If an id is specified
+ and more than one forked processes are invoked, each process
+ will have an id followed by the port, e.g my_id-9998. If a
+ forked server has to restart, it will maintain its original id.
+ If not specified, a UUID will be generated.
+ -->
+ <id></id>
+ <!-- whether or not to allow CORS requests. Set to 'all' if you
+ want to allow all CORS requests. Set to NONE or leave blank
+ if you do not want to enable CORS. -->
+ <cors>NONE</cors>
+ <!-- which digests to calculate, comma delimited (e.g. md5,sha256);
+ optionally specify encoding followed by a colon (e.g. "sha1:32").
+ Can be empty if you don't want to calculate a digest -->
+ <digest>sha256</digest>
+ <!-- how much to read to memory during the digest phase before
+ spooling to disc...only if digest is selected -->
+ <digestMarkLimit>1000000</digestMarkLimit>
+ <!-- request URI log level 'debug' or 'info' -->
+ <log>info</log>
+ <!-- whether or not to include the stacktrace when a parse exception happens
+ in the data returned to the user -->
+ <includeStack>false</includeStack>
+ <!-- whether or not to enable the status endpoint -->
+ <status>false</status>
+ <!-- If set to 'true', this runs tika server "in process"
+ in the legacy 1.x mode.
+ This means that the server will be susceptible to infinite loops
+ and crashes.
+ If set to 'false', the server will spawn a forked
+ process and restart the forked process on catastrophic failures
+ (this was called -spawnChild mode in 1.x).
+ nofork=false is the default in 2.x
+ -->
+ <nofork>false</nofork>
+ <!-- maximum time to allow per parse before shutting down and restarting
+ the forked parser. Not allowed if nofork=true. -->
+ <taskTimeoutMillis>300000</taskTimeoutMillis>
+ <!-- how often to check whether a parse has timed out.
+ Not allowed if nofork=true. -->
+ <taskPulseMillis>10000</taskPulseMillis>
+ <!-- maximum time to allow for a response from the forked process
+ before shutting it down and restarting it.
+ Not allowed if nofork=true. -->
+ <pingTimeoutMillis>60000</pingTimeoutMillis>
+ <!-- how often to check whether the fork process needs to be restarted
+ Not allowed if nofork=true. -->
+ <pingPulseMillis>10000</pingPulseMillis>
+ <!-- maximum amount of time to wait for a forked process to
+ start up.
+ Not allowed if nofork=true. -->
+ <maxForkedStartupMillis>120000</maxForkedStartupMillis>
+ <!-- maximum number of times to allow a specific forked process
+ to be restarted.
+ Not allowed if nofork=true. -->
+ <maxRestarts>-1</maxRestarts>
+ <!-- maximum files to parse per forked process before
+ restarting the forked process to clear potential
+ memory leaks.
+ Not allowed if nofork=true. -->
+ <maxFiles>100000</maxFiles>
+ <!-- if you want to specify a specific javaHome for
+ the forked process.
+ Not allowed if nofork=true. -->
+ <javaHome></javaHome>
+ <!-- this is for debugging only -->
+ <tmpFilePrefix></tmpFilePrefix>
+ </server>
+</properties>
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 47ada93..23a5871 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -61,7 +61,7 @@ public abstract class CXFTestBase {
private final static int DIGESTER_READ_LIMIT = 20*1024*1024;
protected static final String endPoint =
- "http://localhost:" + TikaServerCli.DEFAULT_PORT;
+ "http://localhost:" + TikaServerConfig.DEFAULT_PORT;
protected Server server;
private TikaConfig tika;
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerConfigTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerConfigTest.java
new file mode 100644
index 0000000..ac9b6c3
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerConfigTest.java
@@ -0,0 +1,20 @@
+package org.apache.tika.server.core;
+
+import org.apache.tika.config.TikaConfigTest;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TikaServerConfigTest {
+
+ @Test
+ public void testBasic() throws Exception {
+ TikaServerConfig config = TikaServerConfig.load(
+ TikaConfigTest.class.getResourceAsStream("/configs/tika-config-server.xml"));
+ assertEquals(-1, config.getMaxRestarts());
+ assertEquals(54321, config.getTaskTimeoutMillis());
+ assertEquals(true, config.isEnableUnsecureFeatures());
+ }
+}
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
index 9e2d7d5..b9d58d7 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
@@ -19,8 +19,10 @@ package org.apache.tika.server.core;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.utils.ProcessUtils;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
@@ -33,9 +35,11 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.ConnectException;
+import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.security.Permission;
import java.time.Duration;
@@ -47,6 +51,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
public class TikaServerIntegrationTest extends IntegrationTestBase {
@@ -61,9 +66,7 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
public void run() {
TikaServerCli.main(
new String[]{
- "-maxFiles", "100",
- "-p", INTEGRATION_TEST_PORT,
- "-tmpFilePrefix", "basic-"
+ "-c", getConfig("tika-config-server-basic.xml")
});
}
};
@@ -127,7 +130,6 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
"-pingPulseMillis", "100",
"-status",
"-tmpFilePrefix", "tika-server-oom"
-
});
}
};
@@ -333,10 +335,7 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
public void run() {
TikaServerCli.main(
new String[]{
- "-JXms20m", "-JXmx10m",
- "-p", INTEGRATION_TEST_PORT,
- "-tmpFilePrefix", "tika-server-badargs"
-
+ "-c", getConfig("tika-config-server-badjvmargs.xml"),
});
}
};
@@ -352,6 +351,15 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
assertEquals(-1, i.get());
}
+ private String getConfig(String configName) {
+ try {
+ return ProcessUtils.escapeCommandLine(Paths.get(TikaServerIntegrationTest.class.
+ getResource("/configs/"+configName).toURI()).toAbsolutePath().toString());
+ } catch (URISyntaxException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
@Test
public void testStdErrOutBasic() throws Exception {
final AtomicInteger i = new AtomicInteger();
@@ -466,7 +474,7 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
private void awaitServerStartup() throws Exception {
Instant started = Instant.now();
long elapsed = Duration.between(started, Instant.now()).toMillis();
- WebClient client = WebClient.create(endPoint + "/tika").accept("text/plain");
+ WebClient client = WebClient.create(endPoint + "/").accept("text/html");
while (elapsed < MAX_WAIT_MS) {
try {
Response response = client.get();
@@ -576,6 +584,8 @@ public class TikaServerIntegrationTest extends IntegrationTestBase {
assertEquals(1, metadataList.size());
assertEquals("Nikolai Lobachevsky", metadataList.get(0).get("author"));
assertContains("hello world", metadataList.get(0).get("X-TIKA:content"));
+ return;
}
+ fail("should have completed within 3 tries");
}
}
diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml
new file mode 100644
index 0000000..603ac8d
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <server>
+ <port>9999</port>
+ <taskTimeoutMillis>54321</taskTimeoutMillis>
+ <enableUnsecureFeatures>true</enableUnsecureFeatures>
+ <forkedJVMArgs>
+ <arg>-Xms20m</arg>
+ <arg>-Xmx10m</arg>
+ </forkedJVMArgs>
+ <endpoints>
+ <endpoint>rmeta</endpoint>
+ </endpoints>
+ </server>
+</properties>
diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml
new file mode 100644
index 0000000..42a2c58
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <server>
+ <taskTimeoutMillis>120000</taskTimeoutMillis>
+ <port>9999</port>
+ <maxFiles>1000</maxFiles>
+ <forkedJVMArgs>
+ <arg>-Xmx512m</arg>
+ </forkedJVMArgs>
+ <endpoints>
+ <endpoint>rmeta</endpoint>
+ </endpoints>
+ </server>
+</properties>
diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml
new file mode 100644
index 0000000..78ec922
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <server>
+ <taskTimeoutMillis>120000</taskTimeoutMillis>
+ <maxFiles>1000</maxFiles>
+ <forkedJVMArgs>
+ <arg>-Xmx512m</arg>
+ </forkedJVMArgs>
+ </server>
+</properties>
diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml
new file mode 100644
index 0000000..50a751e
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <server>
+ <taskTimeoutMillis>54321</taskTimeoutMillis>
+ <enableUnsecureFeatures>true</enableUnsecureFeatures>
+ <forkedJVMArgs>
+ <arg>-Xmx2g</arg>
+ </forkedJVMArgs>
+ <endpoints>
+ <endpoint>rmeta</endpoint>
+ </endpoints>
+ </server>
+</properties>