You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/05/18 10:54:51 UTC
[tika] branch master updated: TIKA-2359: Alert user that tesseract
is available and will be used.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new ebc87ae TIKA-2359: Alert user that tesseract is available and will be used.
ebc87ae is described below
commit ebc87aec539eef752072e95315daee65f7f42ebb
Author: tballison <ta...@mitre.org>
AuthorDate: Thu May 18 06:54:41 2017 -0400
TIKA-2359: Alert user that tesseract is available and will be used.
---
.../org/apache/tika/parser/ocr/TesseractOCRParser.java | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 9728b38..121e096 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -101,16 +101,24 @@ public class TesseractOCRParser extends AbstractParser {
MediaType.image("jpx"), MediaType.image("x-portable-pixmap")
})));
private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<>();
-
+ private static volatile boolean HAS_ALERTED = false;
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
// If Tesseract is installed, offer our supported image types
TesseractOCRConfig config = context.get(TesseractOCRConfig.class, DEFAULT_CONFIG);
- if (hasTesseract(config))
+ if (hasTesseract(config)) {
+ if (! HAS_ALERTED) {
+ LOG.info("Tesseract OCR is installed and will be automatically applied to image files.\n"+
+ "This may dramatically slow down content extraction (TIKA-2359).\n"+
+ "As of Tika 1.15 (and prior versions), Tesseract is automatically called.\n"+
+ "In future versions of Tika, users may need to turn the TesseractOCRParser on via TikaConfig."
+ );
+ HAS_ALERTED = true;
+ }
return SUPPORTED_TYPES;
-
+ }
// Otherwise don't advertise anything, so the other image parsers
// can be selected instead
return Collections.emptySet();
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].