You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/11/10 03:05:15 UTC
[2/3] tika git commit: TIKA-2174 add jpx and jp2 to Tesseract
TIKA-2174 add jpx and jp2 to Tesseract
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/f2661f99
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/f2661f99
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/f2661f99
Branch: refs/heads/2.x
Commit: f2661f997e69fcaf388561f122b306021928a5d4
Parents: 7422218
Author: tballison <ta...@mitre.org>
Authored: Wed Nov 9 12:51:51 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Wed Nov 9 12:51:51 2016 -0500
----------------------------------------------------------------------
.../main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 4 +++-
.../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/f2661f99/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 2203a7f..a83d419 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -95,7 +95,9 @@ public class TesseractOCRParser extends AbstractParser {
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<MediaType>(Arrays.asList(new MediaType[] {
MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
- MediaType.image("x-ms-bmp"), MediaType.image("gif")
+ MediaType.image("x-ms-bmp"), MediaType.image("gif"),
+ MediaType.APPLICATION_XML.image("jp2"),
+ MediaType.image("jpx")
})));
private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
http://git-wip-us.apache.org/repos/asf/tika/blob/f2661f99/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 501364b..8d7e9a9 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -91,7 +91,7 @@ public class TesseractOCRParserTest extends TikaTest {
// Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
assumeTrue(canRun());
- assertEquals(5, parser.getSupportedTypes(parseContext).size());
+ assertEquals(7, parser.getSupportedTypes(parseContext).size());
assertTrue(parser.getSupportedTypes(parseContext).contains(png));
// DefaultParser will now select the TesseractOCRParser.