You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/11/10 03:05:15 UTC

[2/3] tika git commit: TIKA-2174 add jpx and jp2 to Tesseract

TIKA-2174 add jpx and jp2 to Tesseract


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/f2661f99
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/f2661f99
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/f2661f99

Branch: refs/heads/2.x
Commit: f2661f997e69fcaf388561f122b306021928a5d4
Parents: 7422218
Author: tballison <ta...@mitre.org>
Authored: Wed Nov 9 12:51:51 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Wed Nov 9 12:51:51 2016 -0500

----------------------------------------------------------------------
 .../main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 4 +++-
 .../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/f2661f99/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 2203a7f..a83d419 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -95,7 +95,9 @@ public class TesseractOCRParser extends AbstractParser {
     private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
             new HashSet<MediaType>(Arrays.asList(new MediaType[] {
                     MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
-                    MediaType.image("x-ms-bmp"), MediaType.image("gif")
+                    MediaType.image("x-ms-bmp"), MediaType.image("gif"),
+                    MediaType.APPLICATION_XML.image("jp2"),
+                    MediaType.image("jpx")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
 

http://git-wip-us.apache.org/repos/asf/tika/blob/f2661f99/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 501364b..8d7e9a9 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -91,7 +91,7 @@ public class TesseractOCRParserTest extends TikaTest {
         // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
         assumeTrue(canRun());
 
-        assertEquals(5, parser.getSupportedTypes(parseContext).size());
+        assertEquals(7, parser.getSupportedTypes(parseContext).size());
         assertTrue(parser.getSupportedTypes(parseContext).contains(png));
 
         // DefaultParser will now select the TesseractOCRParser.