You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/02/04 23:15:05 UTC

[tika] branch branch_1x updated: TIKA-3010 Install and run Tika-Server as a Service (#305)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 6c62b22  TIKA-3010 Install and run Tika-Server as a Service (#305)
6c62b22 is described below

commit 6c62b22a19b51273009875b6a925322bea02c300
Author: Eric Pugh <ep...@opensourceconnections.com>
AuthorDate: Tue Feb 4 18:07:55 2020 -0500

    TIKA-3010 Install and run Tika-Server as a Service (#305)
    
    * wip, got start command working
    
    * starting and writing pid out
    
    * able to stop tika-servers
    
    * stop commands working
    
    * rename to just tika
    
    * stub out status
    
    * package up tika-server as a package with the bin directory and the tika-server jar file
    
    * install script is installing into /opt/tika, errors on /opt/tika//logs
    
    * service tika start and service tika stop work
    
    * remove solr specific feature
    
    * this is a full path
    
    * no support for restart, only start and stop
    
    * fix name of binary install
    
    * swap to log4j.properties path
    
    * simplify logging configuration
    
    * introduce a spawnChild option
    
    * refer to Tika
    
    * add default option
    
    * document how to run install script
---
 tika-server/README.md                   |  15 +-
 tika-server/assembly.xml                |  30 ++
 tika-server/bin/init.d/tika             |  78 +++++
 tika-server/bin/install_tika_service.sh | 367 ++++++++++++++++++++
 tika-server/bin/tika                    | 571 ++++++++++++++++++++++++++++++++
 tika-server/bin/tika.in.sh              |  79 +++++
 tika-server/pom.xml                     |  18 +
 7 files changed, 1157 insertions(+), 1 deletion(-)

diff --git a/tika-server/README.md b/tika-server/README.md
index 0b451a9..75aef57 100644
--- a/tika-server/README.md
+++ b/tika-server/README.md
@@ -31,6 +31,20 @@ This will load Apache Tika Server and expose its interface on:
 
 `http://localhost:9998`
 
+Installing as a Service on Linux
+-----------------------
+To run as a service on Linux you need to run the `install_tika_service.sh` script.
+
+Assuming you have the binary distribution like `tika-server-2.0.0-SNAPSHOT-bin.tgz`,
+then you can extract the install script via:
+
+`tar xzf tika-server-2.0.0-SNAPSHOT-bin.tgz tika-server-2.0.0-SNAPSHOT-bin/bin/install_tika_service.sh --strip-components=2`
+
+and then run the installation process via:
+
+`./install_tika_service.sh  ./tika-server-2.0.0-SNAPSHOT-bin.tgz`
+
+
 Usage
 -----
 Usage examples from command line with `curl` utility:
@@ -58,4 +72,3 @@ HTTP Return Codes
 `415` - Unknown file type  
 `422` - Unparsable document of known type (password protected documents and unsupported versions like Biff5 Excel)  
 `500` - Internal error  
-
diff --git a/tika-server/assembly.xml b/tika-server/assembly.xml
new file mode 100644
index 0000000..412a9a2
--- /dev/null
+++ b/tika-server/assembly.xml
@@ -0,0 +1,30 @@
+<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
+  <id>bin</id>
+  <baseDirectory>${project.build.finalName}-bin</baseDirectory>
+  <formats>
+    <format>tgz</format>
+    <format>zip</format>
+  </formats>
+  <fileSets>
+    <fileSet>
+      <directory>${project.basedir}</directory>
+      <outputDirectory></outputDirectory>
+      <includes>
+        <include>README*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${project.basedir}/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+    </fileSet>
+  </fileSets>
+  <files>
+    <file>
+      <source>${project.build.directory}/tika-server-${project.version}.jar</source>
+      <outputDirectory/>
+      <destName>tika-server.jar</destName>
+    </file>
+  </files>
+</assembly>
diff --git a/tika-server/bin/init.d/tika b/tika-server/bin/init.d/tika
new file mode 100755
index 0000000..15bb29e
--- /dev/null
+++ b/tika-server/bin/init.d/tika
@@ -0,0 +1,78 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides:          tika
+# Required-Start:    $remote_fs $syslog
+# Required-Stop:     $remote_fs $syslog
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Description:       Controls Apache Tika as a Service
+### END INIT INFO
+
+# Example of a very simple *nix init script that delegates commands to the bin/tika script
+# Typical usage is to do:
+#
+#   cp bin/init.d/tika /etc/init.d/tika
+#   chmod 755 /etc/init.d/tika
+#   chown root:root /etc/init.d/tika
+#   update-rc.d tika defaults
+#   update-rc.d tika enable
+
+# Where you extracted the Tika distribution bundle
+TIKA_INSTALL_DIR="/opt/tika"
+
+if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+  echo "$TIKA_INSTALL_DIR not found! Please check the TIKA_INSTALL_DIR setting in your $0 script."
+  exit 1
+fi
+
+# Path to an include file that defines environment specific settings to override default
+# variables used by the bin/tika script. It's highly recommended to define this script so
+# that you can keep the Tika binary files separated from live files (pid, logs, etc)
+# see bin/tika.in.sh for an example
+TIKA_ENV="/etc/default/tika.in.sh"
+
+if [ ! -f "$TIKA_ENV" ]; then
+  echo "$TIKA_ENV not found! Please check the TIKA_ENV setting in your $0 script."
+  exit 1
+fi
+
+# Specify the user to run Tika as; if not set, then Tika will run as root.
+# Running Tika as root is not recommended for production environments
+RUNAS="tika"
+
+# verify the specified run as user exists
+runas_uid="`id -u "$RUNAS"`"
+if [ $? -ne 0 ]; then
+  echo "User $RUNAS not found! Please create the $RUNAS user before running this script."
+  exit 1
+fi
+
+case "$1" in
+  start|stop|status)
+    TIKA_CMD="$1"
+    ;;
+  *)
+    echo "Usage: $0 {start|stop|status}"
+    exit
+esac
+
+if [ -n "$RUNAS" ]; then
+  su -c "TIKA_INCLUDE=\"$TIKA_ENV\" \"$TIKA_INSTALL_DIR/bin/tika\" $TIKA_CMD" - "$RUNAS"
+else
+  TIKA_INCLUDE="$TIKA_ENV" "$TIKA_INSTALL_DIR/bin/tika" "$TIKA_CMD"
+fi
diff --git a/tika-server/bin/install_tika_service.sh b/tika-server/bin/install_tika_service.sh
new file mode 100755
index 0000000..b414d32
--- /dev/null
+++ b/tika-server/bin/install_tika_service.sh
@@ -0,0 +1,367 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ $EUID -ne 0 ]]; then
+  echo -e "\nERROR: This script must be run as root\n" 1>&2
+  exit 1
+fi
+
+print_usage() {
+  ERROR_MSG="$1"
+
+  if [ "$ERROR_MSG" != "" ]; then
+    echo -e "\nERROR: $ERROR_MSG\n" 1>&2
+  fi
+
+  echo ""
+  echo "Usage: install_tika_service.sh <path_to_tika_distribution_archive> [OPTIONS]"
+  echo ""
+  echo "  The first argument to the script must be a path to a Tika distribution archive, such as tika-server-2.0.0-SNAPSHOT.bin.tgz"
+  echo "    (only .tgz or .zip are supported formats for the archive)"
+  echo ""
+  echo "  Supported OPTIONS include:"
+  echo ""
+  echo "    -d     Directory for live / writable Tika files, such as logs, pid files; defaults to /var/tika"
+  echo ""
+  echo "    -i     Directory to extract the Tika installation archive; defaults to /opt/"
+  echo "             The specified path must exist prior to using this script."
+  echo ""
+  echo "    -p     Port Tika should bind to; default is 9998"
+  echo ""
+  echo "    -s     Service name; defaults to tika"
+  echo ""
+  echo "    -u     User to own the Tika files and run the Tika process as; defaults to tika"
+  echo "             This script will create the specified user account if it does not exist."
+  echo ""
+  echo "    -f     Upgrade Tika. Overwrite symlink and init script of previous installation."
+  echo ""
+  echo "    -n     Do not start Tika service after install, and do not abort on missing Java"
+  echo ""
+  echo " NOTE: Must be run as the root user"
+  echo ""
+} # end print_usage
+
+print_error() {
+  echo $1
+  exit 1
+}
+
+# Locate *NIX distribution by looking for match from various detection strategies
+# We start with /etc/os-release, as this will also work for Docker containers
+for command in "grep -E \"^NAME=\" /etc/os-release" \
+               "lsb_release -i" \
+               "cat /proc/version" \
+               "uname -a" ; do
+    distro_string=$(eval $command 2>/dev/null)
+    unset distro
+    if [[ ${distro_string,,} == *"debian"* ]]; then
+      distro=Debian
+    elif [[ ${distro_string,,} == *"red hat"* ]]; then
+      distro=RedHat
+    elif [[ ${distro_string,,} == *"centos"* ]]; then
+      distro=CentOS
+    elif [[ ${distro_string,,} == *"ubuntu"* ]]; then
+      distro=Ubuntu
+    elif [[ ${distro_string,,} == *"suse"* ]]; then
+      distro=SUSE
+    elif [[ ${distro_string,,} == *"darwin"* ]]; then
+      echo "Sorry, this script does not support macOS. You'll need to setup Tika as a service manually using the documentation provided in the Tika Reference Guide."
+      echo "You could also try installing via Homebrew (http://brew.sh/), e.g. brew install tika"
+      exit 1
+    fi
+    if [[ $distro ]] ; then break ; fi
+done
+if [[ ! $distro ]] ; then
+  echo -e "\nERROR: Unable to auto-detect your *NIX distribution!\nYou'll need to setup Tika as a service manually using the documentation provided in the Tika Reference Guide.\n" 1>&2
+  exit 1
+fi
+
+if [ -z "$1" ]; then
+  print_usage "Must specify the path to the Tika installation archive, such as tika-server-2.0.0-SNAPSHOT-bin.tgz"
+  exit 1
+fi
+
+TIKA_ARCHIVE=$1
+if [ ! -f "$TIKA_ARCHIVE" ]; then
+  print_usage "Specified Tika installation archive $TIKA_ARCHIVE not found!"
+  exit 1
+fi
+
+# strip off path info
+TIKA_INSTALL_FILE=${TIKA_ARCHIVE##*/}
+is_tar=true
+if [ ${TIKA_INSTALL_FILE: -4} == ".tgz" ]; then
+  TIKA_DIR=${TIKA_INSTALL_FILE%.tgz}
+elif [ ${TIKA_INSTALL_FILE: -4} == ".zip" ]; then
+  TIKA_DIR=${TIKA_INSTALL_FILE%.zip}
+  is_tar=false
+else
+  print_usage "Tika installation archive $TIKA_ARCHIVE is invalid, expected a .tgz or .zip file!"
+  exit 1
+fi
+
+TIKA_START=true
+if [ $# -gt 1 ]; then
+  shift
+  while true; do
+    case $1 in
+        -i)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "Directory path is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_EXTRACT_DIR=$2
+            shift 2
+        ;;
+        -d)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "Directory path is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_VAR_DIR="$2"
+            shift 2
+        ;;
+        -u)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "Username is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_USER="$2"
+            shift 2
+        ;;
+        -s)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "Service name is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_SERVICE="$2"
+            shift 2
+        ;;
+        -p)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "Port is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_PORT="$2"
+            shift 2
+        ;;
+        -f)
+            TIKA_UPGRADE="YES"
+            shift 1
+        ;;
+        -n)
+            TIKA_START=false
+            shift 1
+        ;;
+        -help|-usage)
+            print_usage ""
+            exit 0
+        ;;
+        --)
+            shift
+            break
+        ;;
+        *)
+            if [ "$1" != "" ]; then
+              print_usage "Unrecognized or misplaced argument: $1!"
+              exit 1
+            else
+              break # out-of-args, stop looping
+            fi
+        ;;
+    esac
+  done
+fi
+
+# Test for availability of needed tools
+if [[ $is_tar ]] ; then
+  tar --version &>/dev/null     || print_error "Script requires the 'tar' command"
+else
+  unzip -hh &>/dev/null         || print_error "Script requires the 'unzip' command"
+fi
+if [[ $TIKA_START == "true" ]] ; then
+  service --version &>/dev/null || service --help &>/dev/null || print_error "Script requires the 'service' command"
+  java -version &>/dev/null     || print_error "Tika requires java, please install or set JAVA_HOME properly"
+fi
+lsof -h &>/dev/null             || echo "We recommend installing the 'lsof' command for more stable start/stop of Tika"
+
+
+if [ -z "$TIKA_EXTRACT_DIR" ]; then
+  TIKA_EXTRACT_DIR=/opt
+fi
+
+if [ ! -d "$TIKA_EXTRACT_DIR" ]; then
+  print_usage "Installation directory $TIKA_EXTRACT_DIR not found! Please create it before running this script."
+  exit 1
+fi
+
+if [ -z "$TIKA_SERVICE" ]; then
+  TIKA_SERVICE=tika
+fi
+
+if [ -z "$TIKA_VAR_DIR" ]; then
+  TIKA_VAR_DIR="/var/$TIKA_SERVICE"
+fi
+
+if [ -z "$TIKA_USER" ]; then
+  TIKA_USER=tika
+fi
+
+if [ -z "$TIKA_PORT" ]; then
+  TIKA_PORT=9998
+fi
+
+if [ -z "$TIKA_SPAWN_CHILD_OPTS" ]; then
+  TIKA_SPAWN_CHILD_OPTS="-spawnChild"
+fi
+
+if [ -z "$TIKA_UPGRADE" ]; then
+  TIKA_UPGRADE=NO
+fi
+
+if [ ! "$TIKA_UPGRADE" = "YES" ]; then
+  if [ -f "/etc/init.d/$TIKA_SERVICE" ]; then
+    print_usage "/etc/init.d/$TIKA_SERVICE already exists! Perhaps Tika is already setup as a service on this host? To upgrade Tika use the -f option."
+    exit 1
+  fi
+
+  if [ -e "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+    print_usage "$TIKA_EXTRACT_DIR/$TIKA_SERVICE already exists! Please move this directory / link or choose a different service name using the -s option."
+    exit 1
+  fi
+fi
+
+# stop running instance
+if [ -f "/etc/init.d/$TIKA_SERVICE" ]; then
+  echo -e "\nStopping Tika instance if exists ...\n"
+  service "$TIKA_SERVICE" stop
+fi
+
+# create user if not exists
+tika_uid="`id -u "$TIKA_USER"`"
+if [ $? -ne 0 ]; then
+  echo "Creating new user: $TIKA_USER"
+  if [ "$distro" == "RedHat" ] || [ "$distro" == "CentOS" ] ; then
+    adduser --system -U -m --home-dir "$TIKA_VAR_DIR" "$TIKA_USER"
+  elif [ "$distro" == "SUSE" ]; then
+    useradd --system -U -m --home-dir "$TIKA_VAR_DIR" "$TIKA_USER"
+  else
+    adduser --system --shell /bin/bash --group --disabled-password --home "$TIKA_VAR_DIR" "$TIKA_USER"
+  fi
+fi
+
+# extract
+echo "Tika extract dir: $TIKA_EXTRACT_DIR"
+echo "TIKA_DIR: $TIKA_DIR"
+
+TIKA_INSTALL_DIR="$TIKA_EXTRACT_DIR/$TIKA_DIR"
+
+echo "tika install dir: $TIKA_INSTALL_DIR "
+if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+
+  echo -e "\nExtracting $TIKA_ARCHIVE to $TIKA_EXTRACT_DIR\n"
+
+  if $is_tar ; then
+    tar zxf "$TIKA_ARCHIVE" -C "$TIKA_EXTRACT_DIR"
+  else
+    unzip -q "$TIKA_ARCHIVE" -d "$TIKA_EXTRACT_DIR"
+  fi
+
+  if [ ! -d "$TIKA_INSTALL_DIR" ]; then
+    echo -e "\nERROR: Expected directory $TIKA_INSTALL_DIR not found after extracting $TIKA_ARCHIVE ... script fails.\n" 1>&2
+    exit 1
+  fi
+
+  chown -R root: "$TIKA_INSTALL_DIR"
+  find "$TIKA_INSTALL_DIR" -type d -print0 | xargs -0 chmod 0755
+  find "$TIKA_INSTALL_DIR" -type f -print0 | xargs -0 chmod 0644
+  chmod -R 0755 "$TIKA_INSTALL_DIR/bin"
+else
+  echo -e "\nWARNING: $TIKA_INSTALL_DIR already exists! Skipping extract ...\n"
+fi
+
+# create a symlink for easier scripting
+if [ -h "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+  echo -e "\nRemoving old symlink $TIKA_EXTRACT_DIR/$TIKA_SERVICE ...\n"
+  rm "$TIKA_EXTRACT_DIR/$TIKA_SERVICE"
+fi
+if [ -e "$TIKA_EXTRACT_DIR/$TIKA_SERVICE" ]; then
+  echo -e "\nWARNING: $TIKA_EXTRACT_DIR/$TIKA_SERVICE is not symlink! Skipping symlink update ...\n"
+else
+  echo -e "\nInstalling symlink $TIKA_EXTRACT_DIR/$TIKA_SERVICE -> $TIKA_INSTALL_DIR ...\n"
+  ln -s "$TIKA_INSTALL_DIR" "$TIKA_EXTRACT_DIR/$TIKA_SERVICE"
+fi
+
+# install init.d script
+echo -e "\nInstalling /etc/init.d/$TIKA_SERVICE script ...\n"
+cp "$TIKA_INSTALL_DIR/bin/init.d/tika" "/etc/init.d/$TIKA_SERVICE"
+chmod 0744 "/etc/init.d/$TIKA_SERVICE"
+chown root: "/etc/init.d/$TIKA_SERVICE"
+# do some basic variable substitution on the init.d script
+sed_expr1="s#TIKA_INSTALL_DIR=.*#TIKA_INSTALL_DIR=\"$TIKA_EXTRACT_DIR/$TIKA_SERVICE\"#"
+sed_expr2="s#TIKA_ENV=.*#TIKA_ENV=\"/etc/default/$TIKA_SERVICE.in.sh\"#"
+sed_expr3="s#RUNAS=.*#RUNAS=\"$TIKA_USER\"#"
+sed_expr4="s#Provides:.*#Provides: $TIKA_SERVICE#"
+sed -i -e "$sed_expr1" -e "$sed_expr2" -e "$sed_expr3" -e "$sed_expr4" "/etc/init.d/$TIKA_SERVICE"
+
+# install/move configuration
+if [ ! -d /etc/default ]; then
+  mkdir /etc/default
+  chown root: /etc/default
+  chmod 0755 /etc/default
+fi
+if [ -f "$TIKA_VAR_DIR/tika.in.sh" ]; then
+  echo -e "\nMoving existing $TIKA_VAR_DIR/tika.in.sh to /etc/default/$TIKA_SERVICE.in.sh ...\n"
+  mv "$TIKA_VAR_DIR/tika.in.sh" "/etc/default/$TIKA_SERVICE.in.sh"
+elif [ -f "/etc/default/$TIKA_SERVICE.in.sh" ]; then
+  echo -e "\n/etc/default/$TIKA_SERVICE.in.sh already exist. Skipping install ...\n"
+else
+  echo -e "\nInstalling /etc/default/$TIKA_SERVICE.in.sh ...\n"
+  cp "$TIKA_INSTALL_DIR/bin/tika.in.sh" "/etc/default/$TIKA_SERVICE.in.sh"
+  mv "$TIKA_INSTALL_DIR/bin/tika.in.sh" "$TIKA_INSTALL_DIR/bin/tika.in.sh.orig"
+  echo "TIKA_PID_DIR=\"$TIKA_VAR_DIR\"
+LOG4J_PROPS=\"$TIKA_VAR_DIR/log4j.properties\"
+TIKA_LOGS_DIR=\"$TIKA_VAR_DIR/logs\"
+TIKA_PORT=\"$TIKA_PORT\"
+TIKA_SPAWN_CHILD_OPTS=\"$TIKA_SPAWN_CHILD_OPTS\"
+" >> "/etc/default/$TIKA_SERVICE.in.sh"
+fi
+chown root:${TIKA_USER} "/etc/default/$TIKA_SERVICE.in.sh"
+chmod 0640 "/etc/default/$TIKA_SERVICE.in.sh"
+
+# install data directories and files
+mkdir -p "$TIKA_VAR_DIR/logs"
+chown -R "$TIKA_USER:" "$TIKA_VAR_DIR"
+find "$TIKA_VAR_DIR" -type d -print0 | xargs -0 chmod 0750
+find "$TIKA_VAR_DIR" -type f -print0 | xargs -0 chmod 0640  # currently no files exist in /var/tika
+
+# configure autostart of service
+if [[ "$distro" == "RedHat" || "$distro" == "CentOS" || "$distro" == "SUSE" ]]; then
+  chkconfig "$TIKA_SERVICE" on
+else
+  update-rc.d "$TIKA_SERVICE" defaults
+fi
+echo "Service $TIKA_SERVICE installed."
+echo "Customize Tika startup configuration in /etc/default/$TIKA_SERVICE.in.sh"
+
+# start service
+if [[ $TIKA_START == "true" ]] ; then
+  service "$TIKA_SERVICE" start
+  sleep 5
+  service "$TIKA_SERVICE" status
+else
+  echo "Not starting Tika service (option -n given). Start manually with 'service $TIKA_SERVICE start'"
+fi
diff --git a/tika-server/bin/tika b/tika-server/bin/tika
new file mode 100755
index 0000000..e8c6e02
--- /dev/null
+++ b/tika-server/bin/tika
@@ -0,0 +1,571 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TIKA_SCRIPT="$0"
+verbose=false
+THIS_OS=`uname -s`
+
+# What version of Java is required to run this version of Tika.
+JAVA_VER_REQ="8"
+
+stop_all=false
+
+# for now, we don't support running this script from cygwin due to problems
+# like not having lsof, ps auxww, curl, and awkward directory handling
+if [ "${THIS_OS:0:6}" == "CYGWIN" ]; then
+  echo -e "This script does not support cygwin due to severe limitations and lack of adherence\nto BASH standards, such as lack of lsof, curl, and ps options."
+  exit 1
+fi
+
+# Resolve symlinks to this script
+while [ -h "$TIKA_SCRIPT" ] ; do
+  ls=`ls -ld "$TIKA_SCRIPT"`
+  # Drop everything prior to ->
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    TIKA_SCRIPT="$link"
+  else
+    TIKA_SCRIPT=`dirname "$TIKA_SCRIPT"`/"$link"
+  fi
+done
+
+TIKA_TIP=`dirname "$TIKA_SCRIPT"`/..
+TIKA_TIP=`cd "$TIKA_TIP"; pwd`
+DEFAULT_SERVER_DIR="$TIKA_TIP/"
+
+# If an include wasn't specified in the environment, then search for one...
+if [ -z "$TIKA_INCLUDE" ]; then
+  # Locations (in order) to use when searching for an include file.
+  for include in "`dirname "$0"`/tika.in.sh" \
+               "$HOME/.tika.in.sh" \
+               /usr/share/tika/tika.in.sh \
+               /usr/local/share/tika/tika.in.sh \
+               /etc/default/tika.in.sh \
+               /opt/tika/tika.in.sh; do
+    if [ -r "$include" ]; then
+        TIKA_INCLUDE="$include"
+        . "$include"
+        break
+    fi
+  done
+elif [ -r "$TIKA_INCLUDE" ]; then
+  . "$TIKA_INCLUDE"
+fi
+
+if [ -z "$TIKA_PID_DIR" ]; then
+  TIKA_PID_DIR="$TIKA_TIP/bin"
+fi
+
+
+
+
+
+echo "Default server $DEFAULT_SERVER_DIR"
+
+if [ -n "$TIKA_JAVA_HOME" ]; then
+  JAVA="$TIKA_JAVA_HOME/bin/java"
+elif [ -n "$JAVA_HOME" ]; then
+  for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+    if [ -x "$java" ]; then
+      JAVA="$java"
+      break
+    fi
+  done
+  if [ -z "$JAVA" ]; then
+    echo >&2 "The currently defined JAVA_HOME ($JAVA_HOME) refers"
+    echo >&2 "to a location where Java could not be found.  Aborting."
+    echo >&2 "Either fix the JAVA_HOME variable or remove it from the"
+    echo >&2 "environment so that the system PATH will be searched."
+    exit 1
+  fi
+else
+  JAVA=java
+fi
+
+if [ -z "$TIKA_STOP_WAIT" ]; then
+  TIKA_STOP_WAIT=180
+fi
+# test that Java exists, is executable and correct version
+JAVA_VER=$("$JAVA" -version 2>&1)
+if [[ $? -ne 0 ]] ; then
+  echo >&2 "Java not found, or an error was encountered when running java."
+  echo >&2 "A working Java $JAVA_VER_REQ JRE is required to run Tika!"
+  echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
+  echo >&2 "Command that we tried: '${JAVA} -version', with response:"
+  echo >&2 "${JAVA_VER}"
+  echo >&2
+  echo >&2 "Debug information:"
+  echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
+  echo >&2 "Active Path:"
+  echo >&2 "${PATH}"
+  exit 1
+else
+  JAVA_VER_NUM=$(echo $JAVA_VER | head -1 | awk -F '"' '/version/ {print $2}' | sed -e's/^1\.//' | sed -e's/[._-].*$//')
+  if [[ "$JAVA_VER_NUM" -lt "$JAVA_VER_REQ" ]] ; then
+    echo >&2 "Your current version of Java is too old to run this version of Tika."
+    echo >&2 "We found major version $JAVA_VER_NUM, using command '${JAVA} -version', with response:"
+    echo >&2 "${JAVA_VER}"
+    echo >&2
+    echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
+    echo >&2
+    echo >&2 "Debug information:"
+    echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
+    echo >&2 "Active Path:"
+    echo >&2 "${PATH}"
+    exit 1
+  fi
+  JAVA_VENDOR="Oracle"
+  if [ "`echo $JAVA_VER | grep -i "IBM J9"`" != "" ]; then
+      JAVA_VENDOR="IBM J9"
+  fi
+fi
+
+
+function print_usage() {
+  CMD="$1"
+  ERROR_MSG="$2"
+
+  if [ "$ERROR_MSG" != "" ]; then
+    echo -e "\nERROR: $ERROR_MSG\n"
+  fi
+
+  if [ -z "$CMD" ]; then
+    echo ""
+    echo "Usage: tika COMMAND OPTIONS"
+    echo "       where COMMAND is one of: start, stop, status"
+    echo ""
+    echo "  Standalone server example (start Tika running in the background on port 9998):"
+    echo ""
+    echo "    ./tika start -p 9998"
+    echo ""
+    echo "Pass -help after any COMMAND to see command-specific usage information,"
+    echo "  such as:    ./tika start -help or ./tika stop -help"
+    echo ""
+  elif [ "$CMD" == "start" ]; then
+    echo ""
+    echo "Usage: tika $CMD [-f] [-h hostname] [-p port] [-d directory] [-V]"
+    echo ""
+    echo "  -f            Start Tika in foreground; default starts Tika in the background"
+    echo "                  and sends stdout / stderr to tika-PORT-console.log"
+    echo ""
+    echo "  -p <port>     Specify the port to start the Tika HTTP listener on; default is 9998"
+    echo ""
+    echo "  -d            Specify the Tika server directory; defaults to ../"
+    echo ""
+    echo "  -j/--jar      Specify the tika-server.jar; defaults to tika-server.jar"
+    echo ""
+    echo "  -V/--verbose   Verbose messages from this script"
+    echo ""
+
+  elif [ "$CMD" == "stop" ]; then
+    echo ""
+    echo "Usage: tika stop [-p port] [-V]"
+    echo ""
+    echo "  -p <port>     Specify the port the Tika HTTP listener is bound to"
+    echo ""
+    echo "  --all          Find and stop all running Tika servers on this host"
+    echo ""
+    echo "  -V/--verbose   Verbose messages from this script"
+    echo ""
+    echo "  NOTE: To see if any Tika servers are running, do: tika status"
+    echo ""
+  elif [ "$CMD" == "status" ]; then
+    echo ""
+    echo "Usage: tika status"
+    echo ""
+    echo "  This command will show the status of all running Tika servers."
+    echo "  It can only detect those Tika servers running on the current host."
+    echo ""
+  fi
+} # end print_usage
+
+# used to show the script is still alive when waiting on work to complete
+function spinner() {
+  local pid=$1
+  local delay=0.5
+  local spinstr='|/-\'
+  while [ "$(ps aux | awk '{print $2}' | grep -w $pid)" ]; do
+      local temp=${spinstr#?}
+      printf " [%c]  " "$spinstr"
+      local spinstr=$temp${spinstr%"$temp"}
+      sleep $delay
+      printf "\b\b\b\b\b\b"
+  done
+  printf "    \b\b\b\b"
+}
+
+# uses kill -9 to stop Tika process
+function stop_tika() {
+
+  DIR="$1"
+  TIKA_PID="$2"
+
+  if [ -z "$TIKA_PID" ]; then
+    echo "ERROR: No PID found for Tika running on port $TIKA_PORT ... script fails."
+    exit 1
+  fi
+  echo "Sending kill command to Tika running on port $TIKA_PORT with process $TIKA_PID"
+  kill -9 $TIKA_PID
+  rm -f "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+  sleep 1
+
+  CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $TIKA_PID | sort -r | tr -d ' '`
+  if [ "$CHECK_PID" != "" ]; then
+    echo "ERROR: Failed to kill previous Tika Java process $TIKA_PID ... script fails."
+    exit 1
+  fi
+} # end stop_tika
+
+
+if [ $# -eq 1 ]; then
+  case $1 in
+    -help|-usage|-h|--help)
+        print_usage ""
+        exit
+    ;;
+    -info|-i|status)
+        #get_info
+        echo "To be done"
+        exit $?
+    ;;
+  esac
+fi
+
+if [ $# -gt 0 ]; then
+  # if first arg starts with a dash (and it's not -help or -info),
+  # then assume they are starting Tika, such as: tika -f
+  if [[ $1 == -* ]]; then
+    SCRIPT_CMD="start"
+  else
+    SCRIPT_CMD="$1"
+    shift
+  fi
+else
+  # no args - just show usage and exit
+  print_usage ""
+  exit
+fi
+
+# verify the command given is supported
+if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ]; then
+  print_usage "" "$SCRIPT_CMD is not a valid command!"
+  exit 1
+fi
+
+
+# Run in foreground (default is to run in the background)
+FG="false"
+
+if [ $# -gt 0 ]; then
+  while true; do
+    case "$1" in
+      -d|-dir)
+          if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+            print_usage "$SCRIPT_CMD" "Server directory is required when using the $1 option!"
+            exit 1
+          fi
+
+          if [[ "$2" == "." || "$2" == "./" || "$2" == ".." || "$2" == "../" ]]; then
+            TIKA_SERVER_DIR="$(pwd)/$2"
+          else
+            # see if the arg value is relative to the tip vs full path
+            if [[ "$2" != /* ]] && [[ -d "$TIKA_TIP/$2" ]]; then
+              TIKA_SERVER_DIR="$TIKA_TIP/$2"
+            else
+              TIKA_SERVER_DIR="$2"
+            fi
+          fi
+          # resolve it to an absolute path
+          TIKA_SERVER_DIR="$(cd "$TIKA_SERVER_DIR"; pwd)"
+          shift 2
+      ;;
+
+        -f|-foreground)
+            FG="true"
+            shift
+        ;;
+        -p|-port)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "$SCRIPT_CMD" "Port number is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_PORT="$2"
+            shift 2
+        ;;
+        -h|-host)
+            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
+              print_usage "$SCRIPT_CMD" "Hostname is required when using the $1 option!"
+              exit 1
+            fi
+            TIKA_HOST="$2"
+            shift 2
+        ;;
+        -j|--jar)
+            TIKA_SERVER_JAR="$2"
+            shift 2
+        ;;
+        -help|-usage)
+            print_usage "$SCRIPT_CMD"
+            exit 0
+        ;;
+        -V|--verbose)
+            verbose=true
+            shift
+        ;;
+        --all)
+            stop_all=true
+            shift
+        ;;
+        --)
+            shift
+            break
+        ;;
+        *)
+            if [ "${1:0:2}" == "-D" ]; then
+              # pass thru any opts that begin with -D (java system props)
+              TIKA_OPTS+=("$1")
+              PASS_TO_RUN_EXAMPLE+=" $1"
+              shift
+            else
+              if [ "$1" != "" ]; then
+                print_usage "$SCRIPT_CMD" "$1 is not supported by this script"
+                exit 1
+              else
+                break # out-of-args, stop looping
+              fi
+            fi
+        ;;
+    esac
+  done
+fi
+
+if [ -z "$TIKA_SPAWN_CHILD_OPTS" ]; then
+  TIKA_SPAWN_CHILD_OPTS="-spawnChild"
+fi
+
+if [ -z "$TIKA_SERVER_DIR" ]; then
+  TIKA_SERVER_DIR="$DEFAULT_SERVER_DIR"
+fi
+
+if [ ! -e "$TIKA_SERVER_DIR" ]; then
+  echo -e "\nTika server directory $TIKA_SERVER_DIR not found!\n"
+  exit 1
+fi
+
+############# start/stop logic below here ################
+
+if $verbose ; then
+  echo "Using Tika root directory: $TIKA_TIP"
+  echo "Using Java: $JAVA"
+  "$JAVA" -version
+fi
+
+# stop all if no port specified
+if [[ "$SCRIPT_CMD" == "stop" ]]; then
+  if $stop_all; then
+    none_stopped=true
+    find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | while read PIDF
+      do
+        NEXT_PID=`cat "$PIDF"`
+        echo "About to tika port with $NEXT_PID"
+        stop_tika "$TIKA_SERVER_DIR" "$NEXT_PID"
+        none_stopped=false
+        rm -f "$PIDF"
+    done
+    # TODO: none_stopped doesn't get reflected across the subshell
+    # This can be uncommented once we find a clean way out of it
+    # if $none_stopped; then
+    #   echo -e "\nNo Tika servers found to stop.\n"
+    # fi
+  else
+    # not stopping all and don't have a port, but if we can find the pid file for the default port 9998, then use that
+    none_stopped=true
+    numTikas=`find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | wc -l | tr -d ' '`
+    if [ -z "$TIKA_PORT" ]; then
+      if [ $numTikas -eq 1 ]; then
+        # only do this if there is only 1 server running, otherwise they must provide the -p or --all
+        PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-*.pid" -type f)")"
+        CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $PID | sort -r | tr -d ' '`
+        if [ "$CHECK_PID" != "" ]; then
+          stop_tika "$TIKA_SERVER_DIR" "$CHECK_PID"
+          none_stopped=false
+        fi
+      fi
+    else
+      PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-$TIKA_PORT.pid" -type f)")"
+      stop_tika "$TIKA_SERVER_DIR" "$PID"
+      none_stopped=false
+    fi
+
+    if $none_stopped; then
+      if [ $numTikas -gt 0 ]; then
+        echo -e "\nFound $numTikas Tika servers running! Must either specify a port using -p or --all to stop all Tika servers on this host.\n"
+      else
+        echo -e "\nNo Tika servers found to stop.\n"
+      fi
+      exit 1
+    fi
+  fi
+  exit
+fi
+
+
+if [ -z "$TIKA_PORT" ]; then
+  TIKA_PORT=9998
+fi
+
+if [ -z "$TIKA_HOST" ]; then
+  TIKA_HOST='0.0.0.0'
+fi
+
+if [ -z "$TIKA_SERVER_JAR" ]; then
+  TIKA_SERVER_JAR=tika-server.jar
+fi
+
+if [ -z "$TIKA_LOGS_DIR" ]; then
+  TIKA_LOGS_DIR="$TIKA_SERVER_DIR/logs"
+fi
+
+# Launches Tika in foreground/background depending on parameters
+function start_tika() {
+
+  run_in_foreground="$1"
+  TIKA_ADDL_ARGS="$2"
+
+  # define default GC_TUNE
+  if [ -z ${GC_TUNE+x} ]; then
+      GC_TUNE=('-XX:+UseG1GC' \
+        '-XX:+PerfDisableSharedMem' \
+        '-XX:+ParallelRefProcEnabled' \
+        '-XX:MaxGCPauseMillis=250' \
+        '-XX:+UseLargePages' \
+        '-XX:+AlwaysPreTouch')
+  else
+    GC_TUNE=($GC_TUNE)
+  fi
+
+  if $verbose ; then
+    echo -e "\nStarting Tika using the following settings:"
+    echo -e "    JAVA            = $JAVA"
+    echo -e "    TIKA_SERVER_DIR = $TIKA_SERVER_DIR"
+    echo -e "    TIKA_SERVER_JAR = $TIKA_SERVER_JAR"
+    echo -e "    TIKA_HOST       = $TIKA_HOST"
+    echo -e "    TIKA_PORT       = $TIKA_PORT"
+    echo -e "    JAVA_MEM_OPTS   = ${JAVA_MEM_OPTS[@]}"
+    echo -e "    GC_TUNE         = ${GC_TUNE[@]}"
+    echo -e "    GC_LOG_OPTS     = ${GC_LOG_OPTS[@]}"
+    echo -e "    TIKA_SPAWN_CHILD_OPTS = $TIKA_SPAWN_CHILD_OPTS"
+
+    if [ "$TIKA_OPTS" != "" ]; then
+      echo -e "    TIKA_OPTS       = ${TIKA_OPTS[@]}"
+    fi
+
+    if [ "$TIKA_ADDL_ARGS" != "" ]; then
+      echo -e "    TIKA_ADDL_ARGS  = $TIKA_ADDL_ARGS"
+    fi
+
+    if [ "$TIKA_DATA_HOME" != "" ]; then
+      echo -e "    TIKA_DATA_HOME  = $TIKA_DATA_HOME"
+    fi
+
+    echo -e "\n"
+  fi
+
+  # need to launch tika from the server dir
+  cd "$TIKA_SERVER_DIR"
+  if [ ! -e "$TIKA_SERVER_DIR/$TIKA_SERVER_JAR" ]; then
+    echo -e "\nERROR: $TIKA_SERVER_JAR file not found in $TIKA_SERVER_DIR\nPlease check your -d and -j parameters to set the correct Tika server directory and jar.\n"
+    exit 1
+  fi
+
+  TIKA_START_OPTS=('-server' "${JAVA_MEM_OPTS[@]}" "${GC_TUNE[@]}" "${GC_LOG_OPTS[@]}" \
+    "${TIKA_HOST_ARG[@]}" \
+     "${LOG4J_CONFIG[@]}" "${TIKA_OPTS[@]}")
+
+  mkdir -p "$TIKA_LOGS_DIR" 2>/dev/null
+  if [ $? -ne 0 ]; then
+    echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR could not be created. Exiting"
+    exit 1
+  fi
+  if [ ! -w "$TIKA_LOGS_DIR" ]; then
+    echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is not writable. Exiting"
+    exit 1
+  fi
+  case "$TIKA_LOGS_DIR" in
+    contexts|etc|lib|modules|resources|scripts|tika|tika-webapp)
+      echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is invalid. Reserved for the system. Exiting"
+      exit 1
+      ;;
+  esac
+
+
+  if [ "$run_in_foreground" == "true" ]; then
+    if $verbose ; then
+      echo "Startup command"
+      echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS"
+    fi
+    exec "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS
+  else
+    # run Tika in the background
+    if $verbose ; then
+      echo "Startup command"
+      echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS $TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log $TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+    fi
+    nohup "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS \
+	      -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_SPAWN_CHILD_OPTS \
+	1>"$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log" 2>&1 & echo $! > "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"
+
+    # no lsof on cygwin though
+    if hash lsof 2>/dev/null ; then  # hash returns true if lsof is on the path
+      echo -n "Waiting up to $TIKA_STOP_WAIT seconds to see Tika running on port $TIKA_PORT"
+      # Launch in a subshell to show the spinner
+      (loops=0
+      while true
+      do
+        running=`lsof -PniTCP:$TIKA_PORT -sTCP:LISTEN`
+        if [ -z "$running" ]; then
+	  slept=$((loops * 2))
+          if [ $slept -lt $TIKA_STOP_WAIT ]; then
+            sleep 2
+            loops=$[$loops+1]
+          else
+            echo -e "Still not seeing Tika listening on $TIKA_PORT after $TIKA_STOP_WAIT seconds!"
+            tail -30 "$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log"
+            exit # subshell!
+          fi
+        else
+          TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v "\-child" | grep -v grep | awk '{print $2}' | sort -r`
+          echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
+          exit # subshell!
+        fi
+      done) &
+      spinner $!
+    else
+      echo -e "NOTE: Please install lsof as this script needs it to determine if Tika is listening on port $TIKA_PORT."
+      sleep 10
+      TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v "\-child" | grep -v grep | awk '{print $2}' | sort -r`
+      echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
+      return;
+    fi
+  fi
+} # end start_tika
+
+
+
+if [[ "$SCRIPT_CMD" == "start" ]]; then
+  start_tika "$FG" "$ADDITIONAL_CMD_OPTS"
+  exit 1
+fi
diff --git a/tika-server/bin/tika.in.sh b/tika-server/bin/tika.in.sh
new file mode 100755
index 0000000..687ceae
--- /dev/null
+++ b/tika-server/bin/tika.in.sh
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Settings here will override settings in existing env vars or in bin/tika.  The default shipped state
+# of this file is completely commented.
+
+# By default the script will use JAVA_HOME to determine which java
+# to use, but you can set a specific path for Tika to use without
+# affecting other Java applications on your server/workstation.
+#TIKA_JAVA_HOME=""
+
+# This controls the number of seconds that the Tika script will wait for
+# Tika to start.  If the start fails, the script will
+# give up waiting and display the last few lines of the logfile.
+#TIKA_STOP_WAIT="180"
+
+# Enable verbose GC logging...
+#  * If this is unset, various default options will be selected depending on which JVM version is in use
+#  * For Java 8: if this is set, additional params will be added to specify the log file & rotation
+#  * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an
+#    output specifier, will have a 'file' output specifier (as well as formatting & rollover options)
+#    appended, using the effective value of the TIKA_LOGS_DIR.
+#
+#GC_LOG_OPTS='-Xlog:gc*'  # (Java 9+)
+#GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \
+#  -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime"
+
+# These GC settings have shown to work well for a number of common Solr workloads.  Good for Tika?
+#GC_TUNE=" \
+#-XX:SurvivorRatio=4 \
+#-XX:TargetSurvivorRatio=90 \
+#-XX:MaxTenuringThreshold=8 \
+#-XX:+UseConcMarkSweepGC \
+#-XX:ConcGCThreads=4 -XX:ParallelGCThreads=4 \
+#-XX:+CMSScavengeBeforeRemark \
+#-XX:PretenureSizeThreshold=64m \
+#-XX:+UseCMSInitiatingOccupancyOnly \
+#-XX:CMSInitiatingOccupancyFraction=50 \
+#-XX:CMSMaxAbortablePrecleanTime=6000 \
+#-XX:+CMSParallelRemarkEnabled \
+#-XX:+ParallelRefProcEnabled \
+#-XX:-OmitStackTraceInFastThrow  etc.
+
+# Anything you add to the TIKA_OPTS variable will be included in the java
+# start command line as-is, in ADDITION to other options. If you specify the
+# -a option on start script, those options will be appended as well. Examples:
+#TIKA_OPTS="$TIKA_OPTS -Dlog4j.configuration=file:log4j_server.xml"
+
+# Location where the bin/tika script will save PID files for running instances
+# If not set, the script will create PID files in /var/tika
+#TIKA_PID_DIR=
+
+# Tika provides a default Log4J configuration properties file in tika-server.jar
+# however, you may want to customize the log settings and file appender location
+# so you can point the script to use a different log4j.properties file
+#LOG4J_PROPS=/var/tika/log4j.properties
+
+# Location where Tika should write logs to.
+#TIKA_LOGS_DIR=/var/tika/logs
+
+# Sets the port Tika binds to, default is 9998
+#TIKA_PORT=9998
+
+# Tika Server has a number of options to make it more robust to OOMs, Infinite
+# Loops, and Memory Leaks via the -spawnChild parameter. Learn more at
+# http://wiki.apache.org/tika/TikaJAXRS
+#TIKA_SPAWN_CHILD_OPTS=-spawnChild -maxFiles 100000 -JXmx4g
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index 5781d21..75dd783 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -305,6 +305,24 @@
           </excludes>
         </configuration>
       </plugin>
+      <plugin>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>3.2.0</version>
+        <configuration>
+          <descriptors>
+            <descriptor>assembly.xml</descriptor>
+          </descriptors>
+        </configuration>
+        <executions>
+          <execution>
+            <id>make-assembly</id> <!-- this is used for inheritance merges -->
+            <phase>package</phase> <!-- bind to the packaging phase -->
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>        
+      </plugin>
     </plugins>
   </build>
   <profiles>