You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2018/01/14 23:07:43 UTC

[tika] branch branch_1x updated: TIKA-2509: Updated TesseractOCRParser to use configured ImageMagick path

This is an automated email from the ASF dual-hosted git repository.

dmeikle pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 2922511  TIKA-2509: Updated TesseractOCRParser to use configured ImageMagick path
2922511 is described below

commit 2922511b5d1662654921a2e02599324aae4a84f4
Author: David Meikle <da...@meikle.io>
AuthorDate: Sun Jan 14 23:07:34 2018 +0000

    TIKA-2509: Updated TesseractOCRParser to use configured ImageMagick path
---
 CHANGES.txt                                                    |  3 +++
 .../java/org/apache/tika/parser/ocr/TesseractOCRParser.java    | 10 +++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 5e5789b..2e6d3a9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,9 @@ Release 1.18 - ???
 
    * Add a parser for XPS (TIKA-2524).
 
+   * Fixed bug where TesseractOCRParser ignores configured ImageMagickPath,
+     and set rotation script to ignore Python warnings (TIKA-2509)
+
 Release 1.17 - December 8, 2017
 
   ***NOTE: THIS IS THE LAST VERSION OF TIKA THAT WILL RUN
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 1bf229b..08847fd 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -154,7 +154,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
     
     private boolean hasImageMagick(TesseractOCRConfig config) {
         // Fetch where the config says to find ImageMagick Program
-        String ImageMagick = config.getImageMagickPath() + getImageMagickProg();
+        String ImageMagick = getImageMagickPath(config);
 
         // Have we already checked for a copy of ImageMagick Program there?
         if (TESSERACT_PRESENT.containsKey(ImageMagick)) {
@@ -170,6 +170,10 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
      
     }
 
+    private String getImageMagickPath(TesseractOCRConfig config) {
+        return config.getImageMagickPath() + getImageMagickProg();
+    }
+
     static boolean hasPython() {
         // check if python is installed and it has the required dependencies for the rotation program to run
         boolean hasPython = false;
@@ -319,7 +323,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
     	File rotationScript = tmp.createTemporaryFile();
     	Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
     	
-    	String cmd = "python " + rotationScript.getAbsolutePath() + " -f " + streamingObject.getAbsolutePath();
+    	String cmd = "python -W ignore " + rotationScript.getAbsolutePath() + " -f " + streamingObject.getAbsolutePath();
     	String angle = "0"; 
     			
     	DefaultExecutor executor = new DefaultExecutor();
@@ -339,7 +343,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
         }
               
         // process the image - parameter values can be set in TesseractOCRConfig.properties
-    	String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + 
+    	String line = getImageMagickPath(config) + " -density " + config.getDensity() + " -depth " + config.getDepth() +
     			" -colorspace " + config.getColorspace() +  " -filter " + config.getFilter() + 
     			" -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + 
     			" " + streamingObject.getAbsolutePath();    	

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].