You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/01/07 15:29:35 UTC

svn commit: r1650067 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java

Author: nick
Date: Wed Jan  7 14:29:35 2015
New Revision: 1650067

URL: http://svn.apache.org/r1650067
Log:
TIKA-1445 Cache if Tesseract is present at a given path or not

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1650067&r1=1650066&r2=1650067&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java Wed Jan  7 14:29:35 2015
@@ -28,6 +28,7 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -81,6 +82,7 @@ public class TesseractOCRParser extends
               MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
               MediaType.image("x-ms-bmp"), MediaType.image("gif")
   })));
+  private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
 
   @Override
   public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -102,8 +104,19 @@ public class TesseractOCRParser extends
   }
   
   private boolean hasTesseract(TesseractOCRConfig config) {
-      String[] checkCmd = { config.getTesseractPath() + getTesseractProg() };
-      return ExternalParser.check(checkCmd);
+      // Fetch where the config says to find Tesseract
+      String tesseract = config.getTesseractPath() + getTesseractProg();
+      
+      // Have we already checked for a copy of Tesseract there?
+      if (TESSERACT_PRESENT.containsKey(tesseract)) {
+          return TESSERACT_PRESENT.get(tesseract);
+      }
+      
+      // Try running Tesseract from there, and see if it exists + works
+      String[] checkCmd = { tesseract };
+      boolean hasTesseract = ExternalParser.check(checkCmd);
+      TESSERACT_PRESENT.put(tesseract, hasTesseract);
+      return hasTesseract;
   }
 
   public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException,