You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/21 14:08:11 UTC
(tika) branch main updated: TIKA-4216 (#1673)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 237e73f18 TIKA-4216 (#1673)
237e73f18 is described below
commit 237e73f18f46af8322a910178fa8ed99e3710d8f
Author: Tim Allison <ta...@apache.org>
AuthorDate: Thu Mar 21 10:08:05 2024 -0400
TIKA-4216 (#1673)
* TIKA-4216 -- Avoid checking for imagemagick if image processing is disabled
---
.../apache/tika/parser/ocr/TesseractOCRParser.java | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index a79e05b1d..a28ae8951 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -126,6 +126,8 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements
})));
private static volatile boolean HAS_WARNED = false;
+ private static volatile boolean HAS_CHECKED_FOR_IMAGE_MAGICK = false;
+
//if a user specifies a custom tess path or tessdata path
//load the available languages at initialization time
private final Set<String> langs = new HashSet<>();
@@ -190,7 +192,10 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements
return hasTesseract;
}
- boolean hasImageMagick() throws TikaConfigException {
+ synchronized boolean hasImageMagick() throws TikaConfigException {
+ if (HAS_CHECKED_FOR_IMAGE_MAGICK) {
+ return hasImageMagick;
+ }
// Fetch where the config says to find ImageMagick Program
String fullImageMagickPath = imageMagickPath + getImageMagickProg();
@@ -208,7 +213,7 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements
LOG.debug("ImageMagick does not appear to be installed " + "(commandline: " +
fullImageMagickPath + ")");
}
-
+ HAS_CHECKED_FOR_IMAGE_MAGICK = true;
return hasImageMagick;
}
@@ -245,6 +250,11 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements
return;
}
+ //if you haven't checked yet, and a per file config requests imagemagick
+ //and if the default is not to use image processing
+ if (! HAS_CHECKED_FOR_IMAGE_MAGICK && config.isEnableImagePreprocessing()) {
+ hasImageMagick = hasImageMagick();
+ }
try (TemporaryResources tmp = new TemporaryResources()) {
TikaInputStream tikaStream = TikaInputStream.get(stream, tmp, metadata);
@@ -528,7 +538,11 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements
@Override
public void initialize(Map<String, Param> params) throws TikaConfigException {
hasTesseract = hasTesseract();
- hasImageMagick = hasImageMagick();
+ if (isEnableImagePreprocessing()) {
+ hasImageMagick = hasImageMagick();
+ } else {
+ hasImageMagick = false;
+ }
if (preloadLangs) {
preloadLangs();
if (!StringUtils.isBlank(defaultConfig.getLanguage())) {