You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2020/11/30 21:50:40 UTC

[tika-docker] 01/28: Initial version for testing

This is an automated email from the ASF dual-hosted git repository.

dmeikle pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika-docker.git

commit 18bb384b14f3232b58c0b3d7e00e32abb377cb63
Author: David Meikle <dm...@apache.org>
AuthorDate: Thu Jan 9 02:10:24 2020 +0000

    Initial version for testing
---
 .gitignore         | 27 ++++++++++++++++++++++
 .travis.ci.yml     |  9 ++++++++
 README.md          |  3 +++
 docker-tool.sh     | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 full/Dockerfile    | 50 +++++++++++++++++++++++++++++++++++++++
 minimal/Dockerfile | 45 ++++++++++++++++++++++++++++++++++++
 6 files changed, 202 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..dda6180
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+.svn
+target
+dependency-reduced-pom.xml
+.idea
+.classpath
+.project
+.settings
+*.iml
+*.ipr
+*.iws
+*.bin
+nbactions.xml
+nb-configuration.xml
+*.DS_Store
+*.tmp-inception
+*.snap
+.*.swp
+tika-deployment/tika-snap-app/parts/
+tika-deployment/tika-snap-app/prime/
+tika-deployment/tika-snap-app/snap/
+tika-deployment/tika-snap-app/stage/
+tika-deployment/tika-snap-app/test/
+tika-deployment/tika-snap-server/parts/
+tika-deployment/tika-snap-server/prime/
+tika-deployment/tika-snap-server/snap/
+tika-deployment/tika-snap-server/stage/
+
diff --git a/.travis.ci.yml b/.travis.ci.yml
new file mode 100644
index 0000000..913a0bc
--- /dev/null
+++ b/.travis.ci.yml
@@ -0,0 +1,9 @@
+language: bash
+services: docker
+env:
+  matrix:
+    - VERSION=1.23
+    - VERSION=1.22
+script:
+  - docker-tool.sh build $VERSION
+  - docker-tool.sh test $VERSION
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..773b484
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# tika-docker
+
+Work in Progress
\ No newline at end of file
diff --git a/docker-tool.sh b/docker-tool.sh
new file mode 100755
index 0000000..39e02c3
--- /dev/null
+++ b/docker-tool.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+while getopts ":h" opt; do
+  case ${opt} in
+    h )
+      echo "Usage:"
+      echo "    docker-tool.sh -h                      Display this help message."
+      echo "    docker-tool.sh build <TIKA_VERSION>    Builds image(s) for <TIKA_VERSION>."
+      echo "    docker-tool.sh test <TIKA_VERSION>     Tests image(s) for <TIKA_VERSION>."
+      echo "    docker-tool.sh publish <TIKA_VERSION>  Publishes image(s) for <TIKA_VERSION> to Docker Hub."
+      exit 0
+      ;;
+   \? )
+     echo "Invalid Option: -$OPTARG" 1>&2
+     exit 1
+     ;;
+  esac
+done
+
+test_docker_image() {
+     docker run -d --name "$1" -p 9998:9998 apache/tika:"$1"
+     sleep 10
+     curl http://localhost:9998/version
+     docker kill "$1"
+     docker rm "$1"
+}
+
+shift $((OPTIND -1))
+subcommand=$1; shift
+version=$1; shift
+
+case "$subcommand" in
+  build)
+    # Build slim version with minimal dependencies
+    docker build -t apache/tika:${version} --build-arg TIKA_VERSION=${version} - < minimal/Dockerfile
+    # Build full version with OCR, Fonts and GDAL
+    docker build -t apache/tika:${version}-full --build-arg TIKA_VERSION=${version} - < full/Dockerfile
+    ;;
+
+  test)
+    # Test minimal image
+    test_docker_image ${version}
+    # Test full image
+    test_docker_image "${version}-full"
+    ;;
+
+  publish)
+    echo "Does nothing until we get Docker Hub access setup under Apache Organisation"
+    ;;
+
+esac
diff --git a/full/Dockerfile b/full/Dockerfile
new file mode 100644
index 0000000..a9ad964
--- /dev/null
+++ b/full/Dockerfile
@@ -0,0 +1,50 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+FROM ubuntu:bionic as base
+RUN apt-get update
+
+FROM base as dependencies
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless gdal-bin tesseract-ocr \
+        tesseract-ocr-eng tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-spa tesseract-ocr-deu
+
+RUN echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y curl xfonts-utils fonts-freefont-ttf fonts-liberation ttf-mscorefonts-installer wget cabextract
+
+FROM dependencies as fetch_tika
+ARG TIKA_VERSION
+
+ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
+    ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
+    DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+    ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+    TIKA_VERSION=$TIKA_VERSION
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
+    && curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
+    && echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
+    && wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+    && wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc  || rm /tika-server-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+    && gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar
+
+FROM dependencies as runtime
+RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar
+
+EXPOSE 9998
+ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0
+
+LABEL maintainer="Apache Tika Developers dev@tika.apache.org"
diff --git a/minimal/Dockerfile b/minimal/Dockerfile
new file mode 100644
index 0000000..917ef38
--- /dev/null
+++ b/minimal/Dockerfile
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+FROM ubuntu:bionic as base
+RUN apt-get update
+
+FROM base as dependencies
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless
+
+FROM dependencies as fetch_tika
+ARG TIKA_VERSION
+
+ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
+    ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
+    DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+    ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
+    TIKA_VERSION=$TIKA_VERSION
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
+    && curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
+    && echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
+    && wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
+    && wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc  || rm /tika-server-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
+    && gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar
+
+FROM dependencies as runtime
+RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar
+
+EXPOSE 9998
+ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0
+
+LABEL maintainer="Apache Tika Developers dev@tika.apache.org"