You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/07/07 06:39:16 UTC
[03/16] tika git commit: optional processing enabled
optional processing enabled
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b10f2506
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b10f2506
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b10f2506
Branch: refs/heads/master
Commit: b10f2506939a1c62648e2dd7b1094e42f840541a
Parents: de84d71
Author: Zarana Parekh <za...@gmail.com>
Authored: Mon Jun 27 18:22:56 2016 -0700
Committer: Zarana Parekh <za...@gmail.com>
Committed: Mon Jun 27 18:22:56 2016 -0700
----------------------------------------------------------------------
tika-bundle/pom.xml | 2 --
tika-parsers/pom.xml | 2 --
.../tika/parser/ocr/TesseractOCRConfig.java | 18 ++++++++++++++++++
.../tika/parser/ocr/TesseractOCRParser.java | 2 +-
.../tika/parser/ocr/TesseractOCRConfig.properties | 2 ++
5 files changed, 21 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index e94b43f..e993c4b 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -112,7 +112,6 @@
</dependencies>
<build>
- <pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.felix</groupId>
@@ -427,7 +426,6 @@
</configuration>
</plugin>
</plugins>
- </pluginManagement>
</build>
<organization>
http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index cab385e..fc1107c 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -509,7 +509,6 @@
</file>
</activation>
<build>
- <pluginManagement>
<plugins>
<plugin>
<groupId>org.codehaus.gmaven</groupId>
@@ -540,7 +539,6 @@
</executions>
</plugin>
</plugins>
- </pluginManagement>
</build>
</profile>
</profiles>
http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index d660142..0e553b6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -62,6 +62,9 @@ public class TesseractOCRConfig implements Serializable{
// Maximum time (seconds) to wait for the ocring process termination
private int timeout = 120;
+
+ // enable image processing (optional)
+ private int enableProcessing = 0;
// Path to ImageMagick program, if not on system path.
private String ImageMagickPath = "";
@@ -134,6 +137,8 @@ public class TesseractOCRConfig implements Serializable{
getProp(props, "timeout", getTimeout()));
// set parameters for ImageMagick
+ setEnableProcessing(
+ getProp(props, "enableProcessing", isEnableProcessing()));
setImageMagickPath(
getProp(props, "ImageMagickPath", getImageMagickPath()));
setDensity(
@@ -256,6 +261,19 @@ public class TesseractOCRConfig implements Serializable{
return timeout;
}
+ /** @see #setEnableProcessing(boolean) */
+ public int isEnableProcessing() {
+ return enableProcessing;
+ }
+
+ /**
+ * Set the value to true if processing is to be enabled.
+ * Default value is false.
+ */
+ public void setEnableProcessing(int enableProcessing) {
+ this.enableProcessing = enableProcessing;
+ }
+
/**
* @return the density
*/
http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index dae4a64..dbecb71 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -320,7 +320,7 @@ public class TesseractOCRParser extends AbstractParser {
FileUtils.copyFile(input, tmpFile);
// Process image if ImageMagick Tool is present
- if(hasImageMagick(config)) {
+ if(config.isEnableProcessing() == 1 && hasImageMagick(config)) {
processImage(tmpFile,config);
}
http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
index a0a0b54..e18c229 100644
--- a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
+++ b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
@@ -22,6 +22,8 @@ minFileSizeToOcr=0
timeout=120
# properties for image processing
+# to enable processing, set enableProcessing to 1
+enableProcessing=0
ImageMagickPath=
density=300
depth=4