You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2018/01/14 23:07:43 UTC
[tika] branch branch_1x updated: TIKA-2509: Updated
TesseractOCRParser to use configured ImageMagick path
This is an automated email from the ASF dual-hosted git repository.
dmeikle pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new 2922511 TIKA-2509: Updated TesseractOCRParser to use configured ImageMagick path
2922511 is described below
commit 2922511b5d1662654921a2e02599324aae4a84f4
Author: David Meikle <da...@meikle.io>
AuthorDate: Sun Jan 14 23:07:34 2018 +0000
TIKA-2509: Updated TesseractOCRParser to use configured ImageMagick path
---
CHANGES.txt | 3 +++
.../java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 10 +++++++---
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 5e5789b..2e6d3a9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,9 @@ Release 1.18 - ???
* Add a parser for XPS (TIKA-2524).
+ * Fixed bug where TesseractOCRParser ignores configured ImageMagickPath,
+ and set rotation script to ignore Python warnings (TIKA-2509)
+
Release 1.17 - December 8, 2017
***NOTE: THIS IS THE LAST VERSION OF TIKA THAT WILL RUN
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 1bf229b..08847fd 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -154,7 +154,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
private boolean hasImageMagick(TesseractOCRConfig config) {
// Fetch where the config says to find ImageMagick Program
- String ImageMagick = config.getImageMagickPath() + getImageMagickProg();
+ String ImageMagick = getImageMagickPath(config);
// Have we already checked for a copy of ImageMagick Program there?
if (TESSERACT_PRESENT.containsKey(ImageMagick)) {
@@ -170,6 +170,10 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
}
+ private String getImageMagickPath(TesseractOCRConfig config) {
+ return config.getImageMagickPath() + getImageMagickProg();
+ }
+
static boolean hasPython() {
// check if python is installed and it has the required dependencies for the rotation program to run
boolean hasPython = false;
@@ -319,7 +323,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
File rotationScript = tmp.createTemporaryFile();
Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
- String cmd = "python " + rotationScript.getAbsolutePath() + " -f " + streamingObject.getAbsolutePath();
+ String cmd = "python -W ignore " + rotationScript.getAbsolutePath() + " -f " + streamingObject.getAbsolutePath();
String angle = "0";
DefaultExecutor executor = new DefaultExecutor();
@@ -339,7 +343,7 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
}
// process the image - parameter values can be set in TesseractOCRConfig.properties
- String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() +
+ String line = getImageMagickPath(config) + " -density " + config.getDensity() + " -depth " + config.getDepth() +
" -colorspace " + config.getColorspace() + " -filter " + config.getFilter() +
" -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() +
" " + streamingObject.getAbsolutePath();
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].