You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2014/11/17 18:02:06 UTC
svn commit: r1640189 - in /tika/trunk/tika-parsers/src/main:
java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
resources/org/apache/tika/parser/ocr/
resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
Author: dmeikle
Date: Mon Nov 17 17:02:05 2014
New Revision: 1640189
URL: http://svn.apache.org/r1640189
Log:
TIKA-1476: Added default configuration file
Added:
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties (with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java?rev=1640189&r1=1640188&r2=1640189&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java Mon Nov 17 17:02:05 2014
@@ -32,10 +32,9 @@ import java.util.Properties;
* parseContext.set(TesseractOCRConfig.class, config);<br>
* </p>
*
- * Parameters can also be set by creating the TesseractOCRConfig.properties file
- * and placing it in the package org/apache/tika/parser/ocr on the classpath. An
- * example file can be found in the test resources folder:
- * <code>tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-full.properties</code>.
+ * Parameters can also be set by either editing the existing TesseractOCRConfig.properties file in,
+ * tika-parser/src/main/resources/org/apache/tika/parser/ocr, or overriding it by creating your own
+ * and placing it in the package org/apache/tika/parser/ocr on the classpath.
*
*/
public class TesseractOCRConfig implements Serializable{
@@ -206,7 +205,7 @@ public class TesseractOCRConfig implemen
try {
return Integer.parseInt(p);
} catch (Throwable ex) {
- throw new RuntimeException(String.format("Cannot parse TesseractOCRConfig variable $s, invalid integer value", property), ex);
+ throw new RuntimeException(String.format("Cannot parse TesseractOCRConfig variable %s, invalid integer value", property), ex);
}
}
Added: tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties?rev=1640189&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties (added)
+++ tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties Mon Nov 17 17:02:05 2014
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+tesseractPath=
+language=eng
+pageSegMode=1
+maxFileSizeToOcr=2147483647
+minFileSizeToOcr=0
+timeout=120
\ No newline at end of file
Propchange: tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
------------------------------------------------------------------------------
svn:eol-style = native