You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/02/04 23:08:40 UTC
[tika] branch master updated: TIKA-3010 Install and run Tika-Server
as a Service (#305)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new cba7149 TIKA-3010 Install and run Tika-Server as a Service (#305)
cba7149 is described below
commit cba7149243b5d9d2df6b2bf6cbac21adef6aadec
Author: Eric Pugh <ep...@opensourceconnections.com>
AuthorDate: Tue Feb 4 18:07:55 2020 -0500
TIKA-3010 Install and run Tika-Server as a Service (#305)
* wip, got start command working
* starting and writing pid out
* able to stop tika-servers
* stop commands working
* rename to just tika
* stub out status
* package up tika-server as a package with the bin directory and the tika-server jar file
* install script is installing into /opt/tika, errors on /opt/tika//logs
* service tika start and service tika stop work
* remove solr specific feature
* this is a full path
* no support for restart, only start and stop
* fix name of binary install
* swap to log4j.properties path
* simplify logging configuration
* introduce a spawnChild option
* refer to Tika
* add default option
* document how to run install script
---
tika-server/README.md | 15 +-
tika-server/assembly.xml | 30 ++
tika-server/bin/init.d/tika | 78 +++++
tika-server/bin/install_tika_service.sh | 367 ++++++++++++++++++++
tika-server/bin/tika | 571 ++++++++++++++++++++++++++++++++
tika-server/bin/tika.in.sh | 79 +++++
tika-server/pom.xml | 18 +
7 files changed, 1157 insertions(+), 1 deletion(-)
diff --git a/tika-server/README.md b/tika-server/README.md
index 2d36c53..008250a 100644
--- a/tika-server/README.md
+++ b/tika-server/README.md
@@ -32,6 +32,20 @@ This will load Apache Tika Server and expose its interface on:
`http://localhost:9998`
+Installing as a Service on Linux
+-----------------------
+To run as a service on Linux you need to run the `install_tika_service.sh` script.
+
+Assuming you have the binary distribution like `tika-server-2.0.0-SNAPSHOT-bin.tgz`,
+then you can extract the install script via:
+
+`tar xzf tika-server-2.0.0-SNAPSHOT-bin.tgz tika-server-2.0.0-SNAPSHOT-bin/bin/install_tika_service.sh --strip-components=2`
+
+and then run the installation process via:
+
+`./install_tika_service.sh ./tika-server-2.0.0-SNAPSHOT-bin.tgz`
+
+
Usage
-----
Usage examples from command line with `curl` utility:
@@ -59,4 +73,3 @@ HTTP Return Codes
`415` - Unknown file type
`422` - Unparsable document of known type (password protected documents and unsupported versions like Biff5 Excel)
`500` - Internal error
-
diff --git a/tika-server/assembly.xml b/tika-server/assembly.xml
new file mode 100644
index 0000000..412a9a2
--- /dev/null
+++ b/tika-server/assembly.xml
@@ -0,0 +1,30 @@
+<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
+ <id>bin</id>
+ <baseDirectory>${project.build.finalName}-bin</baseDirectory>
+ <formats>
+ <format>tgz</format>
+ <format>zip</format>
+ </formats>
+ <fileSets>
+ <fileSet>
+ <directory>${project.basedir}</directory>
+ <outputDirectory></outputDirectory>
+ <includes>
+ <include>README*</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${project.basedir}/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ </fileSet>
+ </fileSets>
+ <files>
+ <file>
+ <source>${project.build.directory}/tika-server-${project.version}.jar</source>
+ <outputDirectory/>
+ <destName>tika-server.jar</destName>
+ </file>
+ </files>
+</assembly>
diff --git a/tika-server/bin/init.d/tika b/tika-server/bin/init.d/tika
new file mode 100755
index 0000000..15bb29e
--- /dev/null
+++ b/tika-server/bin/init.d/tika
@@ -0,0 +1,78 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides: tika
+# Required-Start: $remote_fs $syslog
+# Required-Stop: $remote_fs $syslog
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Description: Controls Apache Tika as a Service
+### END INIT INFO
+
+# Example of a very simple *nix init script that delegates commands to the bin/tika script
+# Typical usage is to do:
+#
+# cp bin/init.d/tika /etc/init.d/tika
+# chmod 755 /etc/init.d/tika
+# chown root:root /etc/init.d/tika
+# update-rc.d tika defaults
+# update-rc.d tika enable
+
+# Where you extracted the Tika distribution bundle
+TIKA_INSTALL_DIR="/opt/tika"
+
+if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+ echo "$TIKA_INSTALL_DIR not found! Please check the TIKA_INSTALL_DIR setting in your $0 script."
+ exit 1
+fi
+
+# Path to an include file that defines environment specific settings to override default
+# variables used by the bin/tika script. It's highly recommended to define this script so
+# that you can keep the Tika binary files separated from live files (pid, logs, etc)
+# see bin/tika.in.sh for an example
+TIKA_ENV="/etc/default/tika.in.sh"
+
+if [ ! -f "$TIKA_ENV" ]; then
+ echo "$TIKA_ENV not found! Please check the TIKA_ENV setting in your $0 script."
+ exit 1
+fi
+
+# Specify the user to run Tika as; if not set, then Tika will run as root.
+# Running Tika as root is not recommended for production environments
+RUNAS="tika"
+
+# verify the specified run as user exists
+runas_uid="`id -u "$RUNAS"`"
+if [ $? -ne 0 ]; then
+ echo "User $RUNAS not found! Please create the $RUNAS user before running this script."
+ exit 1
+fi
+
+case "$1" in
+ start|stop|status)
+ TIKA_CMD="$1"
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|status}"
+ exit
+esac
+
+if [ -n "$RUNAS" ]; then
+ su -c "TIKA_INCLUDE=\"$TIKA_ENV\" \"$TIKA_INSTALL_DIR/bin/tika\" $TIKA_CMD" - "$RUNAS"
+else
+ TIKA_INCLUDE="$TIKA_ENV" "$TIKA_INSTALL_DIR/bin/tika" "$TIKA_CMD"
+fi
diff --git a/tika-server/bin/install_tika_service.sh b/tika-server/bin/install_tika_service.sh
new file mode 100755
index 0000000..b414d32
--- /dev/null
+++ b/tika-server/bin/install_tika_service.sh
@@ -0,0 +1,367 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ $EUID -ne 0 ]]; then
+ echo -e "\nERROR: This script must be run as root\n" 1>&2
+ exit 1
+fi
+
+print_usage() {
+ ERROR_MSG="$1"
+
+ if [ "$ERROR_MSG" != "" ]; then
+ echo -e "\nERROR: $ERROR_MSG\n" 1>&2
+ fi
+
+ echo ""
+ echo "Usage: install_tika_service.sh <path_to_tika_distribution_archive> [OPTIONS]"
+ echo ""
+ echo " The first argument to the script must be a path to a Tika distribution archive, such as tika-server-2.0.0-SNAPSHOT.bin.tgz"
+ echo " (only .tgz or .zip are supported formats for the archive)"
+ echo ""
+ echo " Supported OPTIONS include:"
+ echo ""
+ echo " -d Directory for live / writable Tika files, such as logs, pid files; defaults to /var/tika"
+ echo ""
+ echo " -i Directory to extract the Tika installation archive; defaults to /opt/"
+ echo " The specified path must exist prior to using this script."
+ echo ""
+ echo " -p Port Tika should bind to; default is 9998"
+ echo ""
+ echo " -s Service name; defaults to tika"
+ echo ""
+ echo " -u User to own the Tika files and run the Tika process as; defaults to tika"
+ echo " This script will create the specified user account if it does not exist."
+ echo ""
+ echo " -f Upgrade Tika. Overwrite symlink and init script of previous installation."
+ echo ""
+ echo " -n Do not start Tika service after install, and do not abort on missing Java"
+ echo ""
+ echo " NOTE: Must be run as the root user"
+ echo ""
+} # end print_usage
+
+print_error() {
+ echo $1
+ exit 1
+}
+
+# Locate *NIX distribution by looking for match from various detection strategies
+# We start with /etc/os-release, as this will also work for Docker containers
+for command in "grep -E \"^NAME=\" /etc/os-release" \
+ "lsb_release -i" \
+ "cat /proc/version" \
+ "uname -a" ; do
+ distro_string=$(eval $command 2>/dev/null)
+ unset distro
+ if [[ ${distro_string,,} == *"debian"* ]]; then
+ distro=Debian
+ elif [[ ${distro_string,,} == *"red hat"* ]]; then
+ distro=RedHat
+ elif [[ ${distro_string,,} == *"centos"* ]]; then
+ distro=CentOS
+ elif [[ ${distro_string,,} == *"ubuntu"* ]]; then
+ distro=Ubuntu
+ elif [[ ${distro_string,,} == *"suse"* ]]; then
+ distro=SUSE
+ elif [[ ${distro_string,,} == *"darwin"* ]]; then
+ echo "Sorry, this script does not support macOS. You'll need to setup Tika as a service manually using the documentation provided in the Tika Reference Guide."
+ echo "You could also try installing via Homebrew (http://brew.sh/), e.g. brew install tika"
+ exit 1
+ fi
+ if [[ $distro ]] ; then break ; fi
+done
+if [[ ! $distro ]] ; then
+ echo -e "\nERROR: Unable to auto-detect your *NIX distribution!\nYou'll need to setup Tika as a service manually using the documentation provided in the Tika Reference Guide.\n" 1>&2
+ exit 1
+fi
+
+if [ -z "$1" ]; then
+ print_usage "Must specify the path to the Tika installation archive, such as tika-server-2.0.0-SNAPSHOT-bin.tgz"
+ exit 1
+fi
+
+TIKA_ARCHIVE=$1
+if [ ! -f "$TIKA_ARCHIVE" ]; then
+ print_usage "Specified Tika installation archive $TIKA_ARCHIVE not found!"
+ exit 1
+fi
+
+# strip off path info
+TIKA_INSTALL_FILE=${TIKA_ARCHIVE##*/}
+is_tar=true
+if [ ${TIKA_INSTALL_FILE: -4} == ".tgz" ]; then
+ TIKA_DIR=${TIKA_INSTALL_FILE%.tgz}
+elif [ ${TIKA_INSTALL_FILE: -4} == ".zip" ]; then
+ TIKA_DIR=${TIKA_INSTALL_FILE%.zip}
+ is_tar=false
+else
+ print_usage "Tika installation archive $TIKA_ARCHIVE is invalid, expected a .tgz or .zip file!"
+ exit 1
+fi
+
+TIKA_START=true
+if [ $# -gt 1 ]; then
+ shift
+ while true; do
+ case $1 in
+ -i)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Directory path is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_EXTRACT_DIR=$2
+ shift 2
+ ;;
+ -d)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Directory path is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_VAR_DIR="$2"
+ shift 2
+ ;;
+ -u)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Username is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_USER="$2"
+ shift 2
+ ;;
+ -s)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Service name is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_SERVICE="$2"
+ shift 2
+ ;;
+ -p)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "Port is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_PORT="$2"
+ shift 2
+ ;;
+ -f)
+ TIKA_UPGRADE="YES"
+ shift 1
+ ;;
+ -n)
+ TIKA_START=false
+ shift 1
+ ;;
+ -help|-usage)
+ print_usage ""
+ exit 0
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ if [ "$1" != "" ]; then
+ print_usage "Unrecognized or misplaced argument: $1!"
+ exit 1
+ else
+ break # out-of-args, stop looping
+ fi
+ ;;
+ esac
+ done
+fi
+
+# Test for availability of needed tools
+if [[ $is_tar ]] ; then
+ tar --version &>/dev/null || print_error "Script requires the 'tar' command"
+else
+ unzip -hh &>/dev/null || print_error "Script requires the 'unzip' command"
+fi
+if [[ $TIKA_START == "true" ]] ; then
+ service --version &>/dev/null || service --help &>/dev/null || print_error "Script requires the 'service' command"
+ java -version &>/dev/null || print_error "Tika requires java, please install or set JAVA_HOME properly"
+fi
+lsof -h &>/dev/null || echo "We recommend installing the 'lsof' command for more stable start/stop of Tika"
+
+
+if [ -z "$TIKA_EXTRACT_DIR" ]; then
+ TIKA_EXTRACT_DIR=/opt
+fi
+
+if [ ! -d "$TIKA_EXTRACT_DIR" ]; then
+ print_usage "Installation directory $TIKA_EXTRACT_DIR not found! Please create it before running this script."
+ exit 1
+fi
+
+if [ -z "$TIKA_SERVICE" ]; then
+ TIKA_SERVICE=tika
+fi
+
+if [ -z "$TIKA_VAR_DIR" ]; then
+ TIKA_VAR_DIR="/var/$TIKA_SERVICE"
+fi
+
+if [ -z "$TIKA_USER" ]; then
+ TIKA_USER=tika
+fi
+
+if [ -z "$TIKA_PORT" ]; then
+ TIKA_PORT=9998
+fi
+
+if [ -z "$TIKA_SPAWN_CHILD_OPTS" ]; then
+ TIKA_SPAWN_CHILD_OPTS="-spawnChild"
+fi
+
+if [ -z "$TIKA_UPGRADE" ]; then
+ TIKA_UPGRADE=NO
+fi
+
+if [ ! "$TIKA_UPGRADE" = "YES" ]; then
+ if [ -f "/etc/init.d/$TIKA_SERVICE" ]; then
+ print_usage "/etc/init.d/$TIKA_SERVICE already exists! Perhaps Tika is already setup as a service on this host? To upgrade Tika use the -f option."
+ exit 1
+ fi
+
+ if [ -e "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+ print_usage "$TIKA_EXTRACT_DIR/$TIKA_SERVICE already exists! Please move this directory / link or choose a different service name using the -s option."
+ exit 1
+ fi
+fi
+
+# stop running instance
+if [ -f "/etc/init.d/$TIKA_SERVICE" ]; then
+ echo -e "\nStopping Tika instance if exists ...\n"
+ service "$TIKA_SERVICE" stop
+fi
+
+# create user if not exists
+tika_uid="`id -u "$TIKA_USER"`"
+if [ $? -ne 0 ]; then
+ echo "Creating new user: $TIKA_USER"
+ if [ "$distro" == "RedHat" ] || [ "$distro" == "CentOS" ] ; then
+ adduser --system -U -m --home-dir "$TIKA_VAR_DIR" "$TIKA_USER"
+ elif [ "$distro" == "SUSE" ]; then
+ useradd --system -U -m --home-dir "$TIKA_VAR_DIR" "$TIKA_USER"
+ else
+ adduser --system --shell /bin/bash --group --disabled-password --home "$TIKA_VAR_DIR" "$TIKA_USER"
+ fi
+fi
+
+# extract
+echo "Tika extract dir: $TIKA_EXTRACT_DIR"
+echo "TIKA_DIR: $TIKA_DIR"
+
+TIKA_INSTALL_DIR="$TIKA_EXTRACT_DIR/$TIKA_DIR"
+
+echo "tika install dir: $TIKA_INSTALL_DIR "
+if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+
+ echo -e "\nExtracting $TIKA_ARCHIVE to $TIKA_EXTRACT_DIR\n"
+
+ if $is_tar ; then
+ tar zxf "$TIKA_ARCHIVE" -C "$TIKA_EXTRACT_DIR"
+ else
+ unzip -q "$TIKA_ARCHIVE" -d "$TIKA_EXTRACT_DIR"
+ fi
+
+ if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+ echo -e "\nERROR: Expected directory $TIKA_INSTALL_DIR not found after extracting $TIKA_ARCHIVE ... script fails.\n" 1>&2
+ exit 1
+ fi
+
+ chown -R root: "$TIKA_INSTALL_DIR"
+ find "$TIKA_INSTALL_DIR" -type d -print0 | xargs -0 chmod 0755
+ find "$TIKA_INSTALL_DIR" -type f -print0 | xargs -0 chmod 0644
+ chmod -R 0755 "$TIKA_INSTALL_DIR/bin"
+else
+ echo -e "\nWARNING: $TIKA_INSTALL_DIR already exists! Skipping extract ...\n"
+fi
+
+# create a symlink for easier scripting
+if [ -h "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+ echo -e "\nRemoving old symlink $TIKA_EXTRACT_DIR/$TIKA_SERVICE ...\n"
+ rm "$TIKA_EXTRACT_DIR/$TIKA_SERVICE"
+fi
+if [ -e "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+ echo -e "\nWARNING: $TIKA_EXTRACT_DIR/$TIKA_SERVICE is not symlink! Skipping symlink update ...\n"
+else
+ echo -e "\nInstalling symlink $TIKA_EXTRACT_DIR/$TIKA_SERVICE -> $TIKA_INSTALL_DIR ...\n"
+ ln -s "$TIKA_INSTALL_DIR" "$TIKA_EXTRACT_DIR/$TIKA_SERVICE"
+fi
+
+# install init.d script
+echo -e "\nInstalling /etc/init.d/$TIKA_SERVICE script ...\n"
+cp "$TIKA_INSTALL_DIR/bin/init.d/tika" "/etc/init.d/$TIKA_SERVICE"
+chmod 0744 "/etc/init.d/$TIKA_SERVICE"
+chown root: "/etc/init.d/$TIKA_SERVICE"
+# do some basic variable substitution on the init.d script
+sed_expr1="s#TIKA_INSTALL_DIR=.*#TIKA_INSTALL_DIR=\"$TIKA_EXTRACT_DIR/$TIKA_SERVICE\"#"
+sed_expr2="s#TIKA_ENV=.*#TIKA_ENV=\"/etc/default/$TIKA_SERVICE.in.sh\"#"
+sed_expr3="s#RUNAS=.*#RUNAS=\"$TIKA_USER\"#"
+sed_expr4="s#Provides:.*#Provides: $TIKA_SERVICE#"
+sed -i -e "$sed_expr1" -e "$sed_expr2" -e "$sed_expr3" -e "$sed_expr4" "/etc/init.d/$TIKA_SERVICE"
+
+# install/move configuration
+if [ ! -d /etc/default ]; then
+ mkdir /etc/default
+ chown root: /etc/default
+ chmod 0755 /etc/default
+fi
+if [ -f "$TIKA_VAR_DIR/tika.in.sh" ]; then
+ echo -e "\nMoving existing $TIKA_VAR_DIR/tika.in.sh to /etc/default/$TIKA_SERVICE.in.sh ...\n"
+ mv "$TIKA_VAR_DIR/tika.in.sh" "/etc/default/$TIKA_SERVICE.in.sh"
+elif [ -f "/etc/default/$TIKA_SERVICE.in.sh" ]; then
+ echo -e "\n/etc/default/$TIKA_SERVICE.in.sh already exist. Skipping install ...\n"
+else
+ echo -e "\nInstalling /etc/default/$TIKA_SERVICE.in.sh ...\n"
+ cp "$TIKA_INSTALL_DIR/bin/tika.in.sh" "/etc/default/$TIKA_SERVICE.in.sh"
+ mv "$TIKA_INSTALL_DIR/bin/tika.in.sh" "$TIKA_INSTALL_DIR/bin/tika.in.sh.orig"
+ echo "TIKA_PID_DIR=\"$TIKA_VAR_DIR\"
+LOG4J_PROPS=\"$TIKA_VAR_DIR/log4j.properties\"
+TIKA_LOGS_DIR=\"$TIKA_VAR_DIR/logs\"
+TIKA_PORT=\"$TIKA_PORT\"
+TIKA_SPAWN_CHILD_OPTS=\"$TIKA_SPAWN_CHILD_OPTS\"
+" >> "/etc/default/$TIKA_SERVICE.in.sh"
+fi
+chown root:${TIKA_USER} "/etc/default/$TIKA_SERVICE.in.sh"
+chmod 0640 "/etc/default/$TIKA_SERVICE.in.sh"
+
+# install data directories and files
+mkdir -p "$TIKA_VAR_DIR/logs"
+chown -R "$TIKA_USER:" "$TIKA_VAR_DIR"
+find "$TIKA_VAR_DIR" -type d -print0 | xargs -0 chmod 0750
+find "$TIKA_VAR_DIR" -type f -print0 | xargs -0 chmod 0640 # currently no files exist in /var/tika
+
+# configure autostart of service
+if [[ "$distro" == "RedHat" || "$distro" == "CentOS" || "$distro" == "SUSE" ]]; then
+ chkconfig "$TIKA_SERVICE" on
+else
+ update-rc.d "$TIKA_SERVICE" defaults
+fi
+echo "Service $TIKA_SERVICE installed."
+echo "Customize Tika startup configuration in /etc/default/$TIKA_SERVICE.in.sh"
+
+# start service
+if [[ $TIKA_START == "true" ]] ; then
+ service "$TIKA_SERVICE" start
+ sleep 5
+ service "$TIKA_SERVICE" status
+else
+ echo "Not starting Tika service (option -n given). Start manually with 'service $TIKA_SERVICE start'"
+fi
diff --git a/tika-server/bin/tika b/tika-server/bin/tika
new file mode 100755
index 0000000..e8c6e02
--- /dev/null
+++ b/tika-server/bin/tika
@@ -0,0 +1,571 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TIKA_SCRIPT="$0"
+verbose=false
+THIS_OS=`uname -s`
+
+# What version of Java is required to run this version of Tika.
+JAVA_VER_REQ="8"
+
+stop_all=false
+
+# for now, we don't support running this script from cygwin due to problems
+# like not having lsof, ps auxww, curl, and awkward directory handling
+if [ "${THIS_OS:0:6}" == "CYGWIN" ]; then
+ echo -e "This script does not support cygwin due to severe limitations and lack of adherence\nto BASH standards, such as lack of lsof, curl, and ps options."
+ exit 1
+fi
+
+# Resolve symlinks to this script
+while [ -h "$TIKA_SCRIPT" ] ; do
+ ls=`ls -ld "$TIKA_SCRIPT"`
+ # Drop everything prior to ->
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ TIKA_SCRIPT="$link"
+ else
+ TIKA_SCRIPT=`dirname "$TIKA_SCRIPT"`/"$link"
+ fi
+done
+
+TIKA_TIP=`dirname "$TIKA_SCRIPT"`/..
+TIKA_TIP=`cd "$TIKA_TIP"; pwd`
+DEFAULT_SERVER_DIR="$TIKA_TIP/"
+
+# If an include wasn't specified in the environment, then search for one...
+if [ -z "$TIKA_INCLUDE" ]; then
+ # Locations (in order) to use when searching for an include file.
+ for include in "`dirname "$0"`/tika.in.sh" \
+ "$HOME/.tika.in.sh" \
+ /usr/share/tika/tika.in.sh \
+ /usr/local/share/tika/tika.in.sh \
+ /etc/default/tika.in.sh \
+ /opt/tika/tika.in.sh; do
+ if [ -r "$include" ]; then
+ TIKA_INCLUDE="$include"
+ . "$include"
+ break
+ fi
+ done
+elif [ -r "$TIKA_INCLUDE" ]; then
+ . "$TIKA_INCLUDE"
+fi
+
+if [ -z "$TIKA_PID_DIR" ]; then
+ TIKA_PID_DIR="$TIKA_TIP/bin"
+fi
+
+
+
+
+
+echo "Default server $DEFAULT_SERVER_DIR"
+
+if [ -n "$TIKA_JAVA_HOME" ]; then
+ JAVA="$TIKA_JAVA_HOME/bin/java"
+elif [ -n "$JAVA_HOME" ]; then
+ for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+ if [ -x "$java" ]; then
+ JAVA="$java"
+ break
+ fi
+ done
+ if [ -z "$JAVA" ]; then
+ echo >&2 "The currently defined JAVA_HOME ($JAVA_HOME) refers"
+ echo >&2 "to a location where Java could not be found. Aborting."
+ echo >&2 "Either fix the JAVA_HOME variable or remove it from the"
+ echo >&2 "environment so that the system PATH will be searched."
+ exit 1
+ fi
+else
+ JAVA=java
+fi
+
+if [ -z "$TIKA_STOP_WAIT" ]; then
+ TIKA_STOP_WAIT=180
+fi
+# test that Java exists, is executable and correct version
+JAVA_VER=$("$JAVA" -version 2>&1)
+if [[ $? -ne 0 ]] ; then
+ echo >&2 "Java not found, or an error was encountered when running java."
+ echo >&2 "A working Java $JAVA_VER_REQ JRE is required to run Tika!"
+ echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
+ echo >&2 "Command that we tried: '${JAVA} -version', with response:"
+ echo >&2 "${JAVA_VER}"
+ echo >&2
+ echo >&2 "Debug information:"
+ echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
+ echo >&2 "Active Path:"
+ echo >&2 "${PATH}"
+ exit 1
+else
+ JAVA_VER_NUM=$(echo $JAVA_VER | head -1 | awk -F '"' '/version/ {print $2}' | sed -e's/^1\.//' | sed -e's/[._-].*$//')
+ if [[ "$JAVA_VER_NUM" -lt "$JAVA_VER_REQ" ]] ; then
+ echo >&2 "Your current version of Java is too old to run this version of Tika."
+ echo >&2 "We found major version $JAVA_VER_NUM, using command '${JAVA} -version', with response:"
+ echo >&2 "${JAVA_VER}"
+ echo >&2
+ echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
+ echo >&2
+ echo >&2 "Debug information:"
+ echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
+ echo >&2 "Active Path:"
+ echo >&2 "${PATH}"
+ exit 1
+ fi
+ JAVA_VENDOR="Oracle"
+ if [ "`echo $JAVA_VER | grep -i "IBM J9"`" != "" ]; then
+ JAVA_VENDOR="IBM J9"
+ fi
+fi
+
+
+function print_usage() {
+ CMD="$1"
+ ERROR_MSG="$2"
+
+ if [ "$ERROR_MSG" != "" ]; then
+ echo -e "\nERROR: $ERROR_MSG\n"
+ fi
+
+ if [ -z "$CMD" ]; then
+ echo ""
+ echo "Usage: tika COMMAND OPTIONS"
+ echo " where COMMAND is one of: start, stop, status"
+ echo ""
+ echo " Standalone server example (start Tika running in the background on port 9998):"
+ echo ""
+ echo " ./tika start -p 9998"
+ echo ""
+ echo "Pass -help after any COMMAND to see command-specific usage information,"
+ echo " such as: ./tika start -help or ./tika stop -help"
+ echo ""
+ elif [ "$CMD" == "start" ]; then
+ echo ""
+ echo "Usage: tika $CMD [-f] [-h hostname] [-p port] [-d directory] [-V]"
+ echo ""
+ echo " -f Start Tika in foreground; default starts Tika in the background"
+ echo " and sends stdout / stderr to tika-PORT-console.log"
+ echo ""
+ echo " -p <port> Specify the port to start the Tika HTTP listener on; default is 9998"
+ echo ""
+ echo " -d Specify the Tika server directory; defaults to ../"
+ echo ""
+ echo " -j/--jar Specify the tika-server.jar; defaults to tika-server.jar"
+ echo ""
+ echo " -V/--verbose Verbose messages from this script"
+ echo ""
+
+ elif [ "$CMD" == "stop" ]; then
+ echo ""
+ echo "Usage: tika stop [-p port] [-V]"
+ echo ""
+ echo " -p <port> Specify the port the Tika HTTP listener is bound to"
+ echo ""
+ echo " --all Find and stop all running Tika servers on this host"
+ echo ""
+ echo " -V/--verbose Verbose messages from this script"
+ echo ""
+ echo " NOTE: To see if any Tika servers are running, do: tika status"
+ echo ""
+ elif [ "$CMD" == "status" ]; then
+ echo ""
+ echo "Usage: tika status"
+ echo ""
+ echo " This command will show the status of all running Tika servers."
+ echo " It can only detect those Tika servers running on the current host."
+ echo ""
+ fi
+} # end print_usage
+
+# used to show the script is still alive when waiting on work to complete
+function spinner() {
+ local pid=$1
+ local delay=0.5
+ local spinstr='|/-\'
+ while [ "$(ps aux | awk '{print $2}' | grep -w $pid)" ]; do
+ local temp=${spinstr#?}
+ printf " [%c] " "$spinstr"
+ local spinstr=$temp${spinstr%"$temp"}
+ sleep $delay
+ printf "\b\b\b\b\b\b"
+ done
+ printf " \b\b\b\b"
+}
+
+# uses kill -9 to stop Tika process
+function stop_tika() {
+
+ DIR="$1"
+ TIKA_PID="$2"
+
+ if [ -z "$TIKA_PID" ]; then
+ echo "ERROR: No PID found for Tika running on port $TIKA_PORT ... script fails."
+ exit 1
+ fi
+ echo "Sending kill command to Tika running on port $TIKA_PORT with process $TIKA_PID"
+ kill -9 $TIKA_PID
+ rm -f "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+ sleep 1
+
+ CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $TIKA_PID | sort -r | tr -d ' '`
+ if [ "$CHECK_PID" != "" ]; then
+ echo "ERROR: Failed to kill previous Tika Java process $TIKA_PID ... script fails."
+ exit 1
+ fi
+} # end stop_tika
+
+
+if [ $# -eq 1 ]; then
+ case $1 in
+ -help|-usage|-h|--help)
+ print_usage ""
+ exit
+ ;;
+ -info|-i|status)
+ #get_info
+ echo "To be done"
+ exit $?
+ ;;
+ esac
+fi
+
+if [ $# -gt 0 ]; then
+ # if first arg starts with a dash (and it's not -help or -info),
+ # then assume they are starting Tika, such as: tika -f
+ if [[ $1 == -* ]]; then
+ SCRIPT_CMD="start"
+ else
+ SCRIPT_CMD="$1"
+ shift
+ fi
+else
+ # no args - just show usage and exit
+ print_usage ""
+ exit
+fi
+
+# verify the command given is supported
+if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ]; then
+ print_usage "" "$SCRIPT_CMD is not a valid command!"
+ exit 1
+fi
+
+
+# Run in foreground (default is to run in the background)
+FG="false"
+
+if [ $# -gt 0 ]; then
+ while true; do
+ case "$1" in
+ -d|-dir)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "$SCRIPT_CMD" "Server directory is required when using the $1 option!"
+ exit 1
+ fi
+
+ if [[ "$2" == "." || "$2" == "./" || "$2" == ".." || "$2" == "../" ]]; then
+ TIKA_SERVER_DIR="$(pwd)/$2"
+ else
+ # see if the arg value is relative to the tip vs full path
+ if [[ "$2" != /* ]] && [[ -d "$TIKA_TIP/$2" ]]; then
+ TIKA_SERVER_DIR="$TIKA_TIP/$2"
+ else
+ TIKA_SERVER_DIR="$2"
+ fi
+ fi
+ # resolve it to an absolute path
+ TIKA_SERVER_DIR="$(cd "$TIKA_SERVER_DIR"; pwd)"
+ shift 2
+ ;;
+
+ -f|-foreground)
+ FG="true"
+ shift
+ ;;
+ -p|-port)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "$SCRIPT_CMD" "Port number is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_PORT="$2"
+ shift 2
+ ;;
+ -h|-host)
+ if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+ print_usage "$SCRIPT_CMD" "Hostname is required when using the $1 option!"
+ exit 1
+ fi
+ TIKA_HOST="$2"
+ shift 2
+ ;;
+ -j|--jar)
+ TIKA_SERVER_JAR="$2"
+ shift 2
+ ;;
+ -help|-usage)
+ print_usage "$SCRIPT_CMD"
+ exit 0
+ ;;
+ -V|--verbose)
+ verbose=true
+ shift
+ ;;
+ --all)
+ stop_all=true
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ if [ "${1:0:2}" == "-D" ]; then
+ # pass thru any opts that begin with -D (java system props)
+ TIKA_OPTS+=("$1")
+ PASS_TO_RUN_EXAMPLE+=" $1"
+ shift
+ else
+ if [ "$1" != "" ]; then
+ print_usage "$SCRIPT_CMD" "$1 is not supported by this script"
+ exit 1
+ else
+ break # out-of-args, stop looping
+ fi
+ fi
+ ;;
+ esac
+ done
+fi
+
+if [ -z "$TIKA_SPAWN_CHILD_OPTS" ]; then
+ TIKA_SPAWN_CHILD_OPTS="-spawnChild"
+fi
+
+if [ -z "$TIKA_SERVER_DIR" ]; then
+ TIKA_SERVER_DIR="$DEFAULT_SERVER_DIR"
+fi
+
+if [ ! -e "$TIKA_SERVER_DIR" ]; then
+ echo -e "\nTika server directory $TIKA_SERVER_DIR not found!\n"
+ exit 1
+fi
+
+############# start/stop logic below here ################
+
+if $verbose ; then
+ echo "Using Tika root directory: $TIKA_TIP"
+ echo "Using Java: $JAVA"
+ "$JAVA" -version
+fi
+
+# stop all if no port specified
+if [[ "$SCRIPT_CMD" == "stop" ]]; then
+ if $stop_all; then
+ none_stopped=true
+ find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | while read PIDF
+ do
+ NEXT_PID=`cat "$PIDF"`
+ echo "About to tika port with $NEXT_PID"
+ stop_tika "$TIKA_SERVER_DIR" "$NEXT_PID"
+ none_stopped=false
+ rm -f "$PIDF"
+ done
+ # TODO: none_stopped doesn't get reflected across the subshell
+ # This can be uncommented once we find a clean way out of it
+ # if $none_stopped; then
+ # echo -e "\nNo Tika servers found to stop.\n"
+ # fi
+ else
+ # not stopping all and don't have a port, but if we can find the pid file for the default port 9998, then use that
+ none_stopped=true
+ numTikas=`find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | wc -l | tr -d ' '`
+ if [ -z "$TIKA_PORT" ]; then
+ if [ $numTikas -eq 1 ]; then
+ # only do this if there is only 1 server running, otherwise they must provide the -p or --all
+ PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-*.pid" -type f)")"
+ CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $PID | sort -r | tr -d ' '`
+ if [ "$CHECK_PID" != "" ]; then
+ stop_tika "$TIKA_SERVER_DIR" "$CHECK_PID"
+ none_stopped=false
+ fi
+ fi
+ else
+ PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-$TIKA_PORT.pid" -type f)")"
+ stop_tika "$TIKA_SERVER_DIR" "$PID"
+ none_stopped=false
+ fi
+
+ if $none_stopped; then
+ if [ $numTikas -gt 0 ]; then
+ echo -e "\nFound $numTikas Tika servers running! Must either specify a port using -p or --all to stop all Tika servers on this host.\n"
+ else
+ echo -e "\nNo Tika servers found to stop.\n"
+ fi
+ exit 1
+ fi
+ fi
+ exit
+fi
+
+
+if [ -z "$TIKA_PORT" ]; then
+ TIKA_PORT=9998
+fi
+
+if [ -z "$TIKA_HOST" ]; then
+ TIKA_HOST='0.0.0.0'
+fi
+
+if [ -z "$TIKA_SERVER_JAR" ]; then
+ TIKA_SERVER_JAR=tika-server.jar
+fi
+
+if [ -z "$TIKA_LOGS_DIR" ]; then
+ TIKA_LOGS_DIR="$TIKA_SERVER_DIR/logs"
+fi
+
+# Launches Tika in foreground/background depending on parameters
+function start_tika() {
+
+ run_in_foreground="$1"
+ TIKA_ADDL_ARGS="$2"
+
+ # define default GC_TUNE
+ if [ -z ${GC_TUNE+x} ]; then
+ GC_TUNE=('-XX:+UseG1GC' \
+ '-XX:+PerfDisableSharedMem' \
+ '-XX:+ParallelRefProcEnabled' \
+ '-XX:MaxGCPauseMillis=250' \
+ '-XX:+UseLargePages' \
+ '-XX:+AlwaysPreTouch')
+ else
+ GC_TUNE=($GC_TUNE)
+ fi
+
+ if $verbose ; then
+ echo -e "\nStarting Tika using the following settings:"
+ echo -e " JAVA = $JAVA"
+ echo -e " TIKA_SERVER_DIR = $TIKA_SERVER_DIR"
+ echo -e " TIKA_SERVER_JAR = $TIKA_SERVER_JAR"
+ echo -e " TIKA_HOST = $TIKA_HOST"
+ echo -e " TIKA_PORT = $TIKA_PORT"
+ echo -e " JAVA_MEM_OPTS = ${JAVA_MEM_OPTS[@]}"
+ echo -e " GC_TUNE = ${GC_TUNE[@]}"
+ echo -e " GC_LOG_OPTS = ${GC_LOG_OPTS[@]}"
+ echo -e " TIKA_SPAWN_CHILD_OPTS = $TIKA_SPAWN_CHILD_OPTS"
+
+ if [ "$TIKA_OPTS" != "" ]; then
+ echo -e " TIKA_OPTS = ${TIKA_OPTS[@]}"
+ fi
+
+ if [ "$TIKA_ADDL_ARGS" != "" ]; then
+ echo -e " TIKA_ADDL_ARGS = $TIKA_ADDL_ARGS"
+ fi
+
+ if [ "$TIKA_DATA_HOME" != "" ]; then
+ echo -e " TIKA_DATA_HOME = $TIKA_DATA_HOME"
+ fi
+
+ echo -e "\n"
+ fi
+
+ # need to launch tika from the server dir
+ cd "$TIKA_SERVER_DIR"
+ if [ ! -e "$TIKA_SERVER_DIR/$TIKA_SERVER_JAR" ]; then
+ echo -e "\nERROR: $TIKA_SERVER_JAR file not found in $TIKA_SERVER_DIR\nPlease check your -d and -j parameters to set the correct Tika server directory and jar.\n"
+ exit 1
+ fi
+
+ TIKA_START_OPTS=('-server' "${JAVA_MEM_OPTS[@]}" "${GC_TUNE[@]}" "${GC_LOG_OPTS[@]}" \
+ "${TIKA_HOST_ARG[@]}" \
+ "${LOG4J_CONFIG[@]}" "${TIKA_OPTS[@]}")
+
+ mkdir -p "$TIKA_LOGS_DIR" 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR could not be created. Exiting"
+ exit 1
+ fi
+ if [ ! -w "$TIKA_LOGS_DIR" ]; then
+ echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is not writable. Exiting"
+ exit 1
+ fi
+ case "$TIKA_LOGS_DIR" in
+ contexts|etc|lib|modules|resources|scripts|tika|tika-webapp)
+ echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is invalid. Reserved for the system. Exiting"
+ exit 1
+ ;;
+ esac
+
+
+ if [ "$run_in_foreground" == "true" ]; then
+ if $verbose ; then
+ echo "Startup command"
+ echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS"
+ fi
+ exec "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS
+ else
+ # run Tika in the background
+ if $verbose ; then
+ echo "Startup command"
+ echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS $TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log $TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+ fi
+ nohup "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS \
+ -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS \
+ 1>"$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log" 2>&1 & echo $! > "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+
+ # no lsof on cygwin though
+ if hash lsof 2>/dev/null ; then # hash returns true if lsof is on the path
+ echo -n "Waiting up to $TIKA_STOP_WAIT seconds to see Tika running on port $TIKA_PORT"
+ # Launch in a subshell to show the spinner
+ (loops=0
+ while true
+ do
+ running=`lsof -PniTCP:$TIKA_PORT -sTCP:LISTEN`
+ if [ -z "$running" ]; then
+ slept=$((loops * 2))
+ if [ $slept -lt $TIKA_STOP_WAIT ]; then
+ sleep 2
+ loops=$[$loops+1]
+ else
+ echo -e "Still not seeing Tika listening on $TIKA_PORT after $TIKA_STOP_WAIT seconds!"
+ tail -30 "$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log"
+ exit # subshell!
+ fi
+ else
+ TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v "\-child" | grep -v grep | awk '{print $2}' | sort -r`
+ echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
+ exit # subshell!
+ fi
+ done) &
+ spinner $!
+ else
+ echo -e "NOTE: Please install lsof as this script needs it to determine if Tika is listening on port $TIKA_PORT."
+ sleep 10
+ TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v "\-child" | grep -v grep | awk '{print $2}' | sort -r`
+ echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
+ return;
+ fi
+ fi
+} # end start_tika
+
+
+
+if [[ "$SCRIPT_CMD" == "start" ]]; then
+ start_tika "$FG" "$ADDITIONAL_CMD_OPTS"
+ exit 1
+fi
diff --git a/tika-server/bin/tika.in.sh b/tika-server/bin/tika.in.sh
new file mode 100755
index 0000000..687ceae
--- /dev/null
+++ b/tika-server/bin/tika.in.sh
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Settings here will override settings in existing env vars or in bin/tika. The default shipped state
+# of this file is completely commented.
+
+# By default the script will use JAVA_HOME to determine which java
+# to use, but you can set a specific path for Tika to use without
+# affecting other Java applications on your server/workstation.
+#TIKA_JAVA_HOME=""
+
+# This controls the number of seconds that the Tika script will wait for
+# Tika to start. If the start fails, the script will
+# give up waiting and display the last few lines of the logfile.
+#TIKA_STOP_WAIT="180"
+
+# Enable verbose GC logging...
+# * If this is unset, various default options will be selected depending on which JVM version is in use
+# * For Java 8: if this is set, additional params will be added to specify the log file & rotation
+# * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an
+# output specifier, will have a 'file' output specifier (as well as formatting & rollover options)
+# appended, using the effective value of the TIKA_LOGS_DIR.
+#
+#GC_LOG_OPTS='-Xlog:gc*' # (Java 9+)
+#GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \
+# -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime"
+
+# These GC settings have shown to work well for a number of common Solr workloads. Good for Tika?
+#GC_TUNE=" \
+#-XX:SurvivorRatio=4 \
+#-XX:TargetSurvivorRatio=90 \
+#-XX:MaxTenuringThreshold=8 \
+#-XX:+UseConcMarkSweepGC \
+#-XX:ConcGCThreads=4 -XX:ParallelGCThreads=4 \
+#-XX:+CMSScavengeBeforeRemark \
+#-XX:PretenureSizeThreshold=64m \
+#-XX:+UseCMSInitiatingOccupancyOnly \
+#-XX:CMSInitiatingOccupancyFraction=50 \
+#-XX:CMSMaxAbortablePrecleanTime=6000 \
+#-XX:+CMSParallelRemarkEnabled \
+#-XX:+ParallelRefProcEnabled \
+#-XX:-OmitStackTraceInFastThrow etc.
+
+# Anything you add to the TIKA_OPTS variable will be included in the java
+# start command line as-is, in ADDITION to other options. If you specify the
+# -a option on start script, those options will be appended as well. Examples:
+#TIKA_OPTS="$TIKA_OPTS -Dlog4j.configuration=file:log4j_server.xml"
+
+# Location where the bin/tika script will save PID files for running instances
+# If not set, the script will create PID files in /var/tika
+#TIKA_PID_DIR=
+
+# Tika provides a default Log4J configuration properties file in tika-server.jar
+# however, you may want to customize the log settings and file appender location
+# so you can point the script to use a different log4j.properties file
+#LOG4J_PROPS=/var/tika/log4j.properties
+
+# Location where Tika should write logs to.
+#TIKA_LOGS_DIR=/var/tika/logs
+
+# Sets the port Tika binds to, default is 9998
+#TIKA_PORT=9998
+
+# Tika Server has a number of options to make it more robust to OOMs, Infinite
+# Loops, and Memory Leaks via the -spawnChild parameter. Learn more at
+# http://wiki.apache.org/tika/TikaJAXRS
+#TIKA_SPAWN_CHILD_OPTS=-spawnChild -maxFiles 100000 -JXmx4g
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index fb377e4..90772fc 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -305,6 +305,24 @@
</excludes>
</configuration>
</plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>3.2.0</version>
+ <configuration>
+ <descriptors>
+ <descriptor>assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id> <!-- this is used for inheritance merges -->
+ <phase>package</phase> <!-- bind to the packaging phase -->
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<profiles>