You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/07/07 06:39:19 UTC

[06/16] tika git commit: updated Javadoc for Tesseract config and parser

updated Javadoc for Tesseract config and parser


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6773d42d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6773d42d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6773d42d

Branch: refs/heads/master
Commit: 6773d42de77230dff621a1010ed37f0505dfa302
Parents: bc6667c
Author: Zarana Parekh <za...@gmail.com>
Authored: Thu Jun 30 15:58:12 2016 -0700
Committer: Zarana Parekh <za...@gmail.com>
Committed: Thu Jun 30 15:58:12 2016 -0700

----------------------------------------------------------------------
 .../tika/parser/ocr/TesseractOCRConfig.java     | 32 ++++++++++++--------
 .../tika/parser/ocr/TesseractOCRParser.java     | 13 +++++---
 2 files changed, 27 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index 558a83d..101003f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -256,12 +256,14 @@ public class TesseractOCRConfig implements Serializable{
 		this.timeout = timeout;
 	}
 
-	/** @see #setTimeout(int timeout)*/
+	/** @see #setTimeout(int timeout)
+	 * @return timeout value for Tesseract */
 	public int getTimeout() {
 		return timeout;
 	}
 	
-	/** @see #setEnableImageProcessing(boolean) */
+	/** @see #setEnableImageProcessing(boolean)
+	 * @return image processing is enabled or not */
 	public int isEnableImageProcessing() {
 		return enableImageProcessing;
 	}
@@ -282,12 +284,12 @@ public class TesseractOCRConfig implements Serializable{
 	}
 
 	/**
-	 * @param density the density to set
+	 * @param density the density to set. Valid range of values is 150-1200.
 	 * Default value is 300.
 	 */
 	public void setDensity(int density) {
 		if(density < 150 || density > 1200) {
-			throw new IllegalArgumentException("Invalid density value");
+			throw new IllegalArgumentException("Invalid density value. Valid range of values is 150-1200.");
 		}
 		this.density = density;
 	}
@@ -300,7 +302,7 @@ public class TesseractOCRConfig implements Serializable{
 	}
 
 	/**
-	 * @param depth the depth to set
+	 * @param depth the depth to set. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096.
 	 * Default value is 4.
 	 */
 	public void setDepth(int depth) {
@@ -311,7 +313,7 @@ public class TesseractOCRConfig implements Serializable{
 				return;
 			}
 		}
-		throw new IllegalArgumentException("Invalid depth value");
+		throw new IllegalArgumentException("Invalid depth value. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096.");
 	}
 
 	/**
@@ -329,7 +331,7 @@ public class TesseractOCRConfig implements Serializable{
 		if(!colorspace.equals(null)) {
 			this.colorspace = colorspace;
 		} else {
-			throw new IllegalArgumentException("Invalid colorspace value");
+			throw new IllegalArgumentException("Colorspace value cannot be null.");
 		}
 	}
 
@@ -341,12 +343,13 @@ public class TesseractOCRConfig implements Serializable{
 	}
 
 	/**
-	 * @param filter the filter to set
+	 * @param filter the filter to set. Valid values are point, hermite, cubic, box, gaussian, catrom, triangle, quadratic and mitchell.
 	 * Default value is triangle.
 	 */
 	public void setFilter(String filter) {
 		if(filter.equals(null)) {
-			throw new IllegalArgumentException("Invalid filter value");
+			throw new IllegalArgumentException("Filter value cannot be null. Valid values are point, hermite, "
+					+ "cubic, box, gaussian, catrom, triangle, quadratic and mitchell.");
 		}
 		
 		String[] allowedFilters = {"Point", "Hermite", "Cubic", "Box", "Gaussian", "Catrom", "Triangle", "Quadratic", "Mitchell"};
@@ -356,7 +359,8 @@ public class TesseractOCRConfig implements Serializable{
 				return;
 			}
 		}
-		throw new IllegalArgumentException("Invalid filter value");
+		throw new IllegalArgumentException("Invalid filter value. Valid values are point, hermite, "
+					+ "cubic, box, gaussian, catrom, triangle, quadratic and mitchell.");
 	}
 
 	/**
@@ -367,7 +371,7 @@ public class TesseractOCRConfig implements Serializable{
 	}
 
 	/**
-	 * @param resize the resize to set
+	 * @param resize the resize to set. Valid range of values is 100-900.
 	 * Default value is 900.
 	 */
 	public void setResize(int resize) {
@@ -377,10 +381,11 @@ public class TesseractOCRConfig implements Serializable{
 				return;
 			}
 		}
-		throw new IllegalArgumentException("Invalid resize value");
+		throw new IllegalArgumentException("Invalid resize value. Valid range of values is 100-900.");
 	}
 
-	/** @see #setImageMagickPath(String ImageMagickPath)*/
+	/** @see #setImageMagickPath(String ImageMagickPath)
+	 * @return path to ImageMagick file. */
 	public String getImageMagickPath() {
 		
 		return ImageMagickPath;
@@ -388,6 +393,7 @@ public class TesseractOCRConfig implements Serializable{
 	
 	/**
 	 * Set the path to the ImageMagick executable, needed if it is not on system path.
+	 * @param path to ImageMagick file.
 	 */
 	public void setImageMagickPath(String ImageMagickPath) {
 		if(!ImageMagickPath.isEmpty() && !ImageMagickPath.endsWith(File.separator))

http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index c2ef1ee..ae67425 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -137,7 +137,7 @@ public class TesseractOCRParser extends AbstractParser {
      
     }
     
-    public boolean hasImageMagick(TesseractOCRConfig config) {
+    private boolean hasImageMagick(TesseractOCRConfig config) {
         // Fetch where the config says to find ImageMagick Program
         String ImageMagick = config.getImageMagickPath() + getImageMagickProg();
 
@@ -155,7 +155,7 @@ public class TesseractOCRParser extends AbstractParser {
      
     }
     
-    public boolean hasPython() {
+    private boolean hasPython() {
     	// check if python is installed and if the rotation program path has been specified correctly
         
     	boolean hasPython = false;
@@ -261,8 +261,8 @@ public class TesseractOCRParser extends AbstractParser {
      * This method is used to process the image to an OCR-friendly format.
      * @param streamingObject input image to be processed
      * @param config TesseractOCRconfig class to get ImageMagick properties
-     * @throws IOException
-     * @throws TikaException
+     * @throws IOException if an input error occurred
+     * @throws TikaException if an exception timed out
      */
     private void processImage(File streamingObject, TesseractOCRConfig config) throws IOException, TikaException {
     	
@@ -292,7 +292,10 @@ public class TesseractOCRParser extends AbstractParser {
         }
               
         // process the image - parameter values can be set in TesseractOCRConfig.properties
-    	String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + " -colorspace " + config.getColorspace() +  " -filter " + config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + " " + streamingObject.getAbsolutePath();    	
+    	String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + 
+    			" -colorspace " + config.getColorspace() +  " -filter " + config.getFilter() + 
+    			" -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + 
+    			" " + streamingObject.getAbsolutePath();    	
         cmdLine = CommandLine.parse(line);
 		try {
 			executor.execute(cmdLine);