You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/01/07 15:29:35 UTC
svn commit: r1650067 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
Author: nick
Date: Wed Jan 7 14:29:35 2015
New Revision: 1650067
URL: http://svn.apache.org/r1650067
Log:
TIKA-1445 Cache if Tesseract is present at a given path or not
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1650067&r1=1650066&r2=1650067&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java Wed Jan 7 14:29:35 2015
@@ -28,6 +28,7 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -81,6 +82,7 @@ public class TesseractOCRParser extends
MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
MediaType.image("x-ms-bmp"), MediaType.image("gif")
})));
+ private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -102,8 +104,19 @@ public class TesseractOCRParser extends
}
private boolean hasTesseract(TesseractOCRConfig config) {
- String[] checkCmd = { config.getTesseractPath() + getTesseractProg() };
- return ExternalParser.check(checkCmd);
+ // Fetch where the config says to find Tesseract
+ String tesseract = config.getTesseractPath() + getTesseractProg();
+
+ // Have we already checked for a copy of Tesseract there?
+ if (TESSERACT_PRESENT.containsKey(tesseract)) {
+ return TESSERACT_PRESENT.get(tesseract);
+ }
+
+ // Try running Tesseract from there, and see if it exists + works
+ String[] checkCmd = { tesseract };
+ boolean hasTesseract = ExternalParser.check(checkCmd);
+ TESSERACT_PRESENT.put(tesseract, hasTesseract);
+ return hasTesseract;
}
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException,