You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/01/07 13:14:19 UTC

svn commit: r1650046 - /tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java

Author: nick
Date: Wed Jan  7 12:14:18 2015
New Revision: 1650046

URL: http://svn.apache.org/r1650046
Log:
TIKA-1445 Unit test to show that when an invalid tesseract config is given, and tesseract cannot be found, TesseractOCRParser will return no types and will not be selected by DefaultParser

Modified:
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java?rev=1650046&r1=1650045&r2=1650046&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java Wed Jan  7 12:14:18 2015
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.ocr;
 
 import static org.apache.tika.parser.ocr.TesseractOCRParser.getTesseractProg;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;
 
@@ -24,10 +25,14 @@ import java.io.InputStream;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.parser.image.ImageParser;
+import org.apache.tika.parser.jpeg.JpegParser;
 import org.apache.tika.parser.pdf.PDFParserConfig;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
@@ -45,6 +50,38 @@ public class TesseractOCRParserTest exte
         // If Tesseract is not on the path, do not run the test.
         return ExternalParser.check(checkCmd);
     }
+    
+    @Test
+    public void offersNoTypesIfNotFound() throws Exception {
+        TesseractOCRParser parser = new TesseractOCRParser();
+        DefaultParser defaultParser = new DefaultParser();
+        MediaType png = MediaType.image("png");
+        
+        // With an invalid path, will offer no types
+        TesseractOCRConfig invalidConfig = new TesseractOCRConfig();
+        invalidConfig.setTesseractPath("/made/up/path");
+        
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(TesseractOCRConfig.class, invalidConfig);
+
+        // No types offered
+        assertEquals(0, parser.getSupportedTypes(parseContext).size());
+        
+        // And DefaultParser won't use us
+        assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
+        
+        
+        // With a correct path, with offer the usual types
+        TesseractOCRConfig normalConfig = new TesseractOCRConfig();
+        assumeTrue(canRun(normalConfig));
+        parseContext.set(TesseractOCRConfig.class, normalConfig);
+        
+        assertEquals(5, parser.getSupportedTypes(parseContext).size());
+        assertTrue(parser.getSupportedTypes(parseContext).contains(png));
+        
+        // DefaultParser now will
+        assertEquals(TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
+    }
 
     @Test
     public void testPDFOCR() throws Exception {