You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2020/11/30 21:50:40 UTC
[tika-docker] 01/28: Initial version for testing
This is an automated email from the ASF dual-hosted git repository.
dmeikle pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika-docker.git
commit 18bb384b14f3232b58c0b3d7e00e32abb377cb63
Author: David Meikle <dm...@apache.org>
AuthorDate: Thu Jan 9 02:10:24 2020 +0000
Initial version for testing
---
.gitignore | 27 ++++++++++++++++++++++
.travis.ci.yml | 9 ++++++++
README.md | 3 +++
docker-tool.sh | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
full/Dockerfile | 50 +++++++++++++++++++++++++++++++++++++++
minimal/Dockerfile | 45 ++++++++++++++++++++++++++++++++++++
6 files changed, 202 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..dda6180
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+.svn
+target
+dependency-reduced-pom.xml
+.idea
+.classpath
+.project
+.settings
+*.iml
+*.ipr
+*.iws
+*.bin
+nbactions.xml
+nb-configuration.xml
+*.DS_Store
+*.tmp-inception
+*.snap
+.*.swp
+tika-deployment/tika-snap-app/parts/
+tika-deployment/tika-snap-app/prime/
+tika-deployment/tika-snap-app/snap/
+tika-deployment/tika-snap-app/stage/
+tika-deployment/tika-snap-app/test/
+tika-deployment/tika-snap-server/parts/
+tika-deployment/tika-snap-server/prime/
+tika-deployment/tika-snap-server/snap/
+tika-deployment/tika-snap-server/stage/
+
diff --git a/.travis.ci.yml b/.travis.ci.yml
new file mode 100644
index 0000000..913a0bc
--- /dev/null
+++ b/.travis.ci.yml
@@ -0,0 +1,9 @@
+language: bash
+services: docker
+env:
+ matrix:
+ - VERSION=1.23
+ - VERSION=1.22
+script:
+ - docker-tool.sh build $VERSION
+ - docker-tool.sh test $VERSION
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..773b484
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# tika-docker
+
+Work in Progress
\ No newline at end of file
diff --git a/docker-tool.sh b/docker-tool.sh
new file mode 100755
index 0000000..39e02c3
--- /dev/null
+++ b/docker-tool.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+while getopts ":h" opt; do
+ case ${opt} in
+ h )
+ echo "Usage:"
+ echo " docker-tool.sh -h Display this help message."
+ echo " docker-tool.sh build <TIKA_VERSION> Builds image(s) for <TIKA_VERSION>."
+ echo " docker-tool.sh test <TIKA_VERSION> Tests image(s) for <TIKA_VERSION>."
+ echo " docker-tool.sh publish <TIKA_VERSION> Publishes image(s) for <TIKA_VERSION> to Docker Hub."
+ exit 0
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG" 1>&2
+ exit 1
+ ;;
+ esac
+done
+
+test_docker_image() {
+ docker run -d --name "$1" -p 9998:9998 apache/tika:"$1"
+ sleep 10
+ curl http://localhost:9998/version
+ docker kill "$1"
+ docker rm "$1"
+}
+
+shift $((OPTIND -1))
+subcommand=$1; shift
+version=$1; shift
+
+case "$subcommand" in
+ build)
+ # Build slim version with minimal dependencies
+ docker build -t apache/tika:${version} --build-arg TIKA_VERSION=${version} - < minimal/Dockerfile
+ # Build full version with OCR, Fonts and GDAL
+ docker build -t apache/tika:${version}-full --build-arg TIKA_VERSION=${version} - < full/Dockerfile
+ ;;
+
+ test)
+ # Test minimal image
+ test_docker_image ${version}
+ # Test full image
+ test_docker_image "${version}-full"
+ ;;
+
+ publish)
+ echo "Does nothing until we get Docker Hub access setup under Apache Organisation"
+ ;;
+
+esac
diff --git a/full/Dockerfile b/full/Dockerfile
new file mode 100644
index 0000000..a9ad964
--- /dev/null
+++ b/full/Dockerfile
@@ -0,0 +1,50 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+FROM ubuntu:bionic as base
+RUN apt-get update
+
+FROM base as dependencies
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless gdal-bin tesseract-ocr \
+ tesseract-ocr-eng tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-spa tesseract-ocr-deu
+
+RUN echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y curl xfonts-utils fonts-freefont-ttf fonts-liberation ttf-mscorefonts-installer wget cabextract
+
+FROM dependencies as fetch_tika
+ARG TIKA_VERSION
+
+ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
+ ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
+ DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+ ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+ TIKA_VERSION=$TIKA_VERSION
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
+ && curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
+ && echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
+ && wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+ && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+ && wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+ && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+ && gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar
+
+FROM dependencies as runtime
+RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar
+
+EXPOSE 9998
+ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0
+
+LABEL maintainer="Apache Tika Developers dev@tika.apache.org"
diff --git a/minimal/Dockerfile b/minimal/Dockerfile
new file mode 100644
index 0000000..917ef38
--- /dev/null
+++ b/minimal/Dockerfile
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+FROM ubuntu:bionic as base
+RUN apt-get update
+
+FROM base as dependencies
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless
+
+FROM dependencies as fetch_tika
+ARG TIKA_VERSION
+
+ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
+ ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
+ DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+ ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+ TIKA_VERSION=$TIKA_VERSION
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
+ && curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
+ && echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
+ && wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+ && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+ && wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+ && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+ && gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar
+
+FROM dependencies as runtime
+RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar
+
+EXPOSE 9998
+ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0
+
+LABEL maintainer="Apache Tika Developers dev@tika.apache.org"