You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2014/11/17 18:02:06 UTC

svn commit: r1640189 - in /tika/trunk/tika-parsers/src/main: java/org/apache/tika/parser/ocr/TesseractOCRConfig.java resources/org/apache/tika/parser/ocr/ resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties

Author: dmeikle
Date: Mon Nov 17 17:02:05 2014
New Revision: 1640189

URL: http://svn.apache.org/r1640189
Log:
TIKA-1476: Added default configuration file

Added:
    tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/
    tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties   (with props)
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java?rev=1640189&r1=1640188&r2=1640189&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java Mon Nov 17 17:02:05 2014
@@ -32,10 +32,9 @@ import java.util.Properties;
  * parseContext.set(TesseractOCRConfig.class, config);<br>
  * </p>
  *
- * Parameters can also be set by creating the TesseractOCRConfig.properties file
- * and placing it in the package org/apache/tika/parser/ocr on the classpath.  An
- * example file can be found in the test resources folder:
- * <code>tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-full.properties</code>.
+ * Parameters can also be set by either editing the existing TesseractOCRConfig.properties file in,
+ * tika-parser/src/main/resources/org/apache/tika/parser/ocr, or overriding it by creating your own
+ * and placing it in the package org/apache/tika/parser/ocr on the classpath.
  * 
  */
 public class TesseractOCRConfig implements Serializable{
@@ -206,7 +205,7 @@ public class TesseractOCRConfig implemen
 		try {
 			return Integer.parseInt(p);
 		} catch (Throwable ex) {
-			throw new RuntimeException(String.format("Cannot parse TesseractOCRConfig variable $s, invalid integer value", property), ex);
+			throw new RuntimeException(String.format("Cannot parse TesseractOCRConfig variable %s, invalid integer value", property), ex);
 		}
 	}
 

Added: tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties?rev=1640189&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties (added)
+++ tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties Mon Nov 17 17:02:05 2014
@@ -0,0 +1,21 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+tesseractPath=
+language=eng
+pageSegMode=1
+maxFileSizeToOcr=2147483647
+minFileSizeToOcr=0
+timeout=120
\ No newline at end of file

Propchange: tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
------------------------------------------------------------------------------
    svn:eol-style = native