You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/07/07 06:39:16 UTC

[03/16] tika git commit: optional processing enabled

optional processing enabled


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b10f2506
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b10f2506
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b10f2506

Branch: refs/heads/master
Commit: b10f2506939a1c62648e2dd7b1094e42f840541a
Parents: de84d71
Author: Zarana Parekh <za...@gmail.com>
Authored: Mon Jun 27 18:22:56 2016 -0700
Committer: Zarana Parekh <za...@gmail.com>
Committed: Mon Jun 27 18:22:56 2016 -0700

----------------------------------------------------------------------
 tika-bundle/pom.xml                               |  2 --
 tika-parsers/pom.xml                              |  2 --
 .../tika/parser/ocr/TesseractOCRConfig.java       | 18 ++++++++++++++++++
 .../tika/parser/ocr/TesseractOCRParser.java       |  2 +-
 .../tika/parser/ocr/TesseractOCRConfig.properties |  2 ++
 5 files changed, 21 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index e94b43f..e993c4b 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -112,7 +112,6 @@
   </dependencies>
 
   <build>
-  	<pluginManagement>
 	    <plugins>
 	      <plugin>
 	        <groupId>org.apache.felix</groupId>
@@ -427,7 +426,6 @@
 	        </configuration>
 	      </plugin>
 	    </plugins>
-    </pluginManagement>
   </build>
 
   <organization>

http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index cab385e..fc1107c 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -509,7 +509,6 @@
         </file>
       </activation>
       <build>
-      <pluginManagement>
         <plugins>
           <plugin>
             <groupId>org.codehaus.gmaven</groupId>
@@ -540,7 +539,6 @@
             </executions>
           </plugin>
         </plugins>
-        </pluginManagement>
       </build>
     </profile>
   </profiles>

http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index d660142..0e553b6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -62,6 +62,9 @@ public class TesseractOCRConfig implements Serializable{
 
 	// Maximum time (seconds) to wait for the ocring process termination
 	private int timeout = 120;
+	
+	// enable image processing (optional)
+	private int enableProcessing = 0;
 
 	// Path to ImageMagick program, if not on system path.
 	private String ImageMagickPath = "";
@@ -134,6 +137,8 @@ public class TesseractOCRConfig implements Serializable{
                 getProp(props, "timeout", getTimeout()));
 		
 		// set parameters for ImageMagick
+		setEnableProcessing(
+				getProp(props, "enableProcessing", isEnableProcessing()));
 		setImageMagickPath(
 				getProp(props, "ImageMagickPath", getImageMagickPath()));
 		setDensity(
@@ -256,6 +261,19 @@ public class TesseractOCRConfig implements Serializable{
 		return timeout;
 	}
 	
+	/** @see #setEnableProcessing(boolean) */
+	public int isEnableProcessing() {
+		return enableProcessing;
+	}
+
+	/** 
+	 * Set the value to true if processing is to be enabled.
+	 * Default value is false.
+	 */
+	public void setEnableProcessing(int enableProcessing) {
+		this.enableProcessing = enableProcessing;
+	}
+
 	/**
 	 * @return the density
 	 */

http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index dae4a64..dbecb71 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -320,7 +320,7 @@ public class TesseractOCRParser extends AbstractParser {
             	FileUtils.copyFile(input, tmpFile);
             	
             	// Process image if ImageMagick Tool is present
-            	if(hasImageMagick(config)) {
+            	if(config.isEnableProcessing() == 1 && hasImageMagick(config)) {
             		processImage(tmpFile,config);
             	}
             	

http://git-wip-us.apache.org/repos/asf/tika/blob/b10f2506/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
index a0a0b54..e18c229 100644
--- a/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
+++ b/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
@@ -22,6 +22,8 @@ minFileSizeToOcr=0
 timeout=120
 
 # properties for image processing
+# to enable processing, set enableProcessing to 1
+enableProcessing=0
 ImageMagickPath=
 density=300
 depth=4