You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tika.apache.org by GitBox <gi...@apache.org> on 2021/01/01 16:25:51 UTC

[GitHub] [tika-docker] dameikle commented on a change in pull request #2: set tesseract ocr langauges as docker build args

dameikle commented on a change in pull request #2:
URL: https://github.com/apache/tika-docker/pull/2#discussion_r550782944



##########
File path: docker-tool.sh
##########
@@ -58,13 +60,18 @@ test_docker_image() {
 shift $((OPTIND -1))
 subcommand=$1; shift
 version=$1; shift
+tesseract_languages=$1; shift
 
 case "$subcommand" in
   build)
+    build_args="--build-arg TIKA_VERSION=${version}"
+    if [[ ! -z "$tesseract_languages" ]]; then
+      build_args="$build_args --build-arg TESSERACT_LANGUAGES='${tesseract_languages}'"
+    fi
     # Build slim version with minimal dependencies
     docker build -t apache/tika:${version} --build-arg TIKA_VERSION=${version} - < minimal/Dockerfile --no-cache
     # Build full version with OCR, Fonts and GDAL
-    docker build -t apache/tika:${version}-full --build-arg TIKA_VERSION=${version} - < full/Dockerfile --no-cache
+    docker build -t apache/tika:${version}-full ${build_args} - < full/Dockerfile --no-cache

Review comment:
       Not sure how you are proposing to call this from docker-tool.sh?
   
   Calling like this:
   
   `./docker-tool.sh build 1.25 tesseract-ocr-eng tesseract-ocr-ita`
   
   Will only get the the first item.
   
   Calling like this:
   
   `./docker-tool.sh build 1.25 'tesseract-ocr-eng tesseract-ocr-ita`
   
   Will break due to escaping on execution
   
   

##########
File path: docker-tool.sh
##########
@@ -21,11 +21,13 @@ while getopts ":h" opt; do
   case ${opt} in
     h )
       echo "Usage:"
-      echo "    docker-tool.sh -h                      Display this help message."
-      echo "    docker-tool.sh build <TIKA_VERSION>    Builds images for <TIKA_VERSION>."
-      echo "    docker-tool.sh test <TIKA_VERSION>     Tests images for <TIKA_VERSION>."
-      echo "    docker-tool.sh publish <TIKA_VERSION>  Publishes images for <TIKA_VERSION> to Docker Hub."
-      echo "    docker-tool.sh latest <TIKA_VERSION>   Tags images for <TIKA_VERSION> as latest on Docker Hub."
+      echo "    docker-tool.sh -h                                              Display this help message."
+      echo "    docker-tool.sh build <TIKA_VERSION> ['<TESSERACT_LANGUAGES>']  Builds images for <TIKA_VERSION> via special [<TESSERACT_LANGUAGES>]."
+      echo "    docker-tool.sh test <TIKA_VERSION>                             Tests images for <TIKA_VERSION>."
+      echo "    docker-tool.sh publish <TIKA_VERSION>                          Publishes images for <TIKA_VERSION> to Docker Hub."
+      echo "    docker-tool.sh latest <TIKA_VERSION>                           Tags images for <TIKA_VERSION> as latest on Docker Hub."
+      echo ""
+      ecgi "Note: [<TESSERACT_LANGUAGES>] is optional for full image, if you want to change default `tesseract-ocr` installation languages."

Review comment:
       Should this be _echo_?

##########
File path: docker-tool.sh
##########
@@ -21,11 +21,13 @@ while getopts ":h" opt; do
   case ${opt} in
     h )
       echo "Usage:"
-      echo "    docker-tool.sh -h                      Display this help message."
-      echo "    docker-tool.sh build <TIKA_VERSION>    Builds images for <TIKA_VERSION>."
-      echo "    docker-tool.sh test <TIKA_VERSION>     Tests images for <TIKA_VERSION>."
-      echo "    docker-tool.sh publish <TIKA_VERSION>  Publishes images for <TIKA_VERSION> to Docker Hub."
-      echo "    docker-tool.sh latest <TIKA_VERSION>   Tags images for <TIKA_VERSION> as latest on Docker Hub."
+      echo "    docker-tool.sh -h                                              Display this help message."
+      echo "    docker-tool.sh build <TIKA_VERSION> ['<TESSERACT_LANGUAGES>']  Builds images for <TIKA_VERSION> via special [<TESSERACT_LANGUAGES>]."
+      echo "    docker-tool.sh test <TIKA_VERSION>                             Tests images for <TIKA_VERSION>."
+      echo "    docker-tool.sh publish <TIKA_VERSION>                          Publishes images for <TIKA_VERSION> to Docker Hub."
+      echo "    docker-tool.sh latest <TIKA_VERSION>                           Tags images for <TIKA_VERSION> as latest on Docker Hub."
+      echo ""
+      ecgi "Note: [<TESSERACT_LANGUAGES>] is optional for full image, if you want to change default `tesseract-ocr` installation languages."

Review comment:
       Need to escape the _tesseract-ocr_ as it will be executed as a command in the currently unescaped form




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org