You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by tp...@apache.org on 2015/01/07 19:11:20 UTC
svn commit: r1650133 -
/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
Author: tpalsulich
Date: Wed Jan 7 18:11:20 2015
New Revision: 1650133
URL: http://svn.apache.org/r1650133
Log:
TIKA-1445. Split TesseractOCRParser#offersNoTypesIfNotFound in two. Small import and comment changes.
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java?rev=1650133&r1=1650132&r2=1650133&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java Wed Jan 7 18:11:20 2015
@@ -36,7 +36,6 @@ import org.apache.tika.parser.external.E
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BasicContentHandlerFactory;
-import org.apache.tika.sax.BodyContentHandler;
import org.junit.Test;
import org.xml.sax.helpers.DefaultHandler;
@@ -54,6 +53,10 @@ public class TesseractOCRParserTest exte
return ExternalParser.check(checkCmd);
}
+ /*
+ Check that if Tesseract is not found, the TesseractOCRParser claims to not support
+ any file types. So, the standard image parser is called instead.
+ */
@Test
public void offersNoTypesIfNotFound() throws Exception {
TesseractOCRParser parser = new TesseractOCRParser();
@@ -72,17 +75,26 @@ public class TesseractOCRParserTest exte
// And DefaultParser won't use us
assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
+ }
+
+ /*
+ If Tesseract is found, test we retrieve the proper number of supporting Parsers.
+ */
+ @Test
+ public void offersTypesIfFound() throws Exception {
+ TesseractOCRParser parser = new TesseractOCRParser();
+ DefaultParser defaultParser = new DefaultParser();
+ ParseContext parseContext = new ParseContext();
+ MediaType png = MediaType.image("png");
- // With a correct path, with offer the usual types
- TesseractOCRConfig normalConfig = new TesseractOCRConfig();
- assumeTrue(canRun(normalConfig));
- parseContext.set(TesseractOCRConfig.class, normalConfig);
+ // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
+ assumeTrue(canRun());
assertEquals(5, parser.getSupportedTypes(parseContext).size());
assertTrue(parser.getSupportedTypes(parseContext).contains(png));
- // DefaultParser now will
+ // DefaultParser will now select the TesseractOCRParser.
assertEquals(TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
}
@@ -126,8 +138,7 @@ public class TesseractOCRParserTest exte
parseContext.set(Parser.class, parser);
parseContext.set(PDFParserConfig.class, pdfConfig);
- InputStream stream = TesseractOCRParserTest.class.getResourceAsStream(
- resource);
+ InputStream stream = TesseractOCRParserTest.class.getResourceAsStream(resource);
try {
parser.parse(stream, new DefaultHandler(), new Metadata(), parseContext);
@@ -155,8 +166,7 @@ public class TesseractOCRParserTest exte
@Test
public void testSingleImage() throws Exception {
- TesseractOCRConfig config = new TesseractOCRConfig();
- assumeTrue(canRun(config));
+ assumeTrue(canRun());
String xml = getXML("testOCR.jpg").xml;
assertContains("OCR Testing", xml);
}