You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/07/07 06:39:19 UTC
[06/16] tika git commit: updated Javadoc for Tesseract config and
parser
updated Javadoc for Tesseract config and parser
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6773d42d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6773d42d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6773d42d
Branch: refs/heads/master
Commit: 6773d42de77230dff621a1010ed37f0505dfa302
Parents: bc6667c
Author: Zarana Parekh <za...@gmail.com>
Authored: Thu Jun 30 15:58:12 2016 -0700
Committer: Zarana Parekh <za...@gmail.com>
Committed: Thu Jun 30 15:58:12 2016 -0700
----------------------------------------------------------------------
.../tika/parser/ocr/TesseractOCRConfig.java | 32 ++++++++++++--------
.../tika/parser/ocr/TesseractOCRParser.java | 13 +++++---
2 files changed, 27 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index 558a83d..101003f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -256,12 +256,14 @@ public class TesseractOCRConfig implements Serializable{
this.timeout = timeout;
}
- /** @see #setTimeout(int timeout)*/
+ /** @see #setTimeout(int timeout)
+ * @return timeout value for Tesseract */
public int getTimeout() {
return timeout;
}
- /** @see #setEnableImageProcessing(boolean) */
+ /** @see #setEnableImageProcessing(boolean)
+ * @return image processing is enabled or not */
public int isEnableImageProcessing() {
return enableImageProcessing;
}
@@ -282,12 +284,12 @@ public class TesseractOCRConfig implements Serializable{
}
/**
- * @param density the density to set
+ * @param density the density to set. Valid range of values is 150-1200.
* Default value is 300.
*/
public void setDensity(int density) {
if(density < 150 || density > 1200) {
- throw new IllegalArgumentException("Invalid density value");
+ throw new IllegalArgumentException("Invalid density value. Valid range of values is 150-1200.");
}
this.density = density;
}
@@ -300,7 +302,7 @@ public class TesseractOCRConfig implements Serializable{
}
/**
- * @param depth the depth to set
+ * @param depth the depth to set. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096.
* Default value is 4.
*/
public void setDepth(int depth) {
@@ -311,7 +313,7 @@ public class TesseractOCRConfig implements Serializable{
return;
}
}
- throw new IllegalArgumentException("Invalid depth value");
+ throw new IllegalArgumentException("Invalid depth value. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096.");
}
/**
@@ -329,7 +331,7 @@ public class TesseractOCRConfig implements Serializable{
if(!colorspace.equals(null)) {
this.colorspace = colorspace;
} else {
- throw new IllegalArgumentException("Invalid colorspace value");
+ throw new IllegalArgumentException("Colorspace value cannot be null.");
}
}
@@ -341,12 +343,13 @@ public class TesseractOCRConfig implements Serializable{
}
/**
- * @param filter the filter to set
+ * @param filter the filter to set. Valid values are point, hermite, cubic, box, gaussian, catrom, triangle, quadratic and mitchell.
* Default value is triangle.
*/
public void setFilter(String filter) {
if(filter.equals(null)) {
- throw new IllegalArgumentException("Invalid filter value");
+ throw new IllegalArgumentException("Filter value cannot be null. Valid values are point, hermite, "
+ + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell.");
}
String[] allowedFilters = {"Point", "Hermite", "Cubic", "Box", "Gaussian", "Catrom", "Triangle", "Quadratic", "Mitchell"};
@@ -356,7 +359,8 @@ public class TesseractOCRConfig implements Serializable{
return;
}
}
- throw new IllegalArgumentException("Invalid filter value");
+ throw new IllegalArgumentException("Invalid filter value. Valid values are point, hermite, "
+ + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell.");
}
/**
@@ -367,7 +371,7 @@ public class TesseractOCRConfig implements Serializable{
}
/**
- * @param resize the resize to set
+ * @param resize the resize to set. Valid range of values is 100-900.
* Default value is 900.
*/
public void setResize(int resize) {
@@ -377,10 +381,11 @@ public class TesseractOCRConfig implements Serializable{
return;
}
}
- throw new IllegalArgumentException("Invalid resize value");
+ throw new IllegalArgumentException("Invalid resize value. Valid range of values is 100-900.");
}
- /** @see #setImageMagickPath(String ImageMagickPath)*/
+ /** @see #setImageMagickPath(String ImageMagickPath)
+ * @return path to ImageMagick file. */
public String getImageMagickPath() {
return ImageMagickPath;
@@ -388,6 +393,7 @@ public class TesseractOCRConfig implements Serializable{
/**
* Set the path to the ImageMagick executable, needed if it is not on system path.
+ * @param path to ImageMagick file.
*/
public void setImageMagickPath(String ImageMagickPath) {
if(!ImageMagickPath.isEmpty() && !ImageMagickPath.endsWith(File.separator))
http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index c2ef1ee..ae67425 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -137,7 +137,7 @@ public class TesseractOCRParser extends AbstractParser {
}
- public boolean hasImageMagick(TesseractOCRConfig config) {
+ private boolean hasImageMagick(TesseractOCRConfig config) {
// Fetch where the config says to find ImageMagick Program
String ImageMagick = config.getImageMagickPath() + getImageMagickProg();
@@ -155,7 +155,7 @@ public class TesseractOCRParser extends AbstractParser {
}
- public boolean hasPython() {
+ private boolean hasPython() {
// check if python is installed and if the rotation program path has been specified correctly
boolean hasPython = false;
@@ -261,8 +261,8 @@ public class TesseractOCRParser extends AbstractParser {
* This method is used to process the image to an OCR-friendly format.
* @param streamingObject input image to be processed
* @param config TesseractOCRconfig class to get ImageMagick properties
- * @throws IOException
- * @throws TikaException
+ * @throws IOException if an input error occurred
+ * @throws TikaException if an exception timed out
*/
private void processImage(File streamingObject, TesseractOCRConfig config) throws IOException, TikaException {
@@ -292,7 +292,10 @@ public class TesseractOCRParser extends AbstractParser {
}
// process the image - parameter values can be set in TesseractOCRConfig.properties
- String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + " -colorspace " + config.getColorspace() + " -filter " + config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + " " + streamingObject.getAbsolutePath();
+ String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() +
+ " -colorspace " + config.getColorspace() + " -filter " + config.getFilter() +
+ " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() +
+ " " + streamingObject.getAbsolutePath();
cmdLine = CommandLine.parse(line);
try {
executor.execute(cmdLine);