You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/11/10 13:06:19 UTC

tika git commit: TIKA-2174 -- add ppm and update changes.txt

Repository: tika
Updated Branches:
  refs/heads/2.x ab009aeb7 -> 3f24e6c3e


TIKA-2174 -- add ppm and update changes.txt


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/3f24e6c3
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/3f24e6c3
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/3f24e6c3

Branch: refs/heads/2.x
Commit: 3f24e6c3e2514a7be2d966305c53a3da0f397ef9
Parents: ab009ae
Author: tballison <ta...@mitre.org>
Authored: Thu Nov 10 08:06:13 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Thu Nov 10 08:06:13 2016 -0500

----------------------------------------------------------------------
 CHANGES.txt                                              | 11 ++++++++---
 .../org/apache/tika/parser/ocr/TesseractOCRParser.java   |  2 +-
 .../apache/tika/parser/ocr/TesseractOCRParserTest.java   |  2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8e4fc56..e062673 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,10 +15,15 @@ Release 2.0 - ???
       parser classes and their dependencies. To keep the old behaviour,
       set your LoadErrorHandler to IGNORE. (TIKA-1805)
 
-Release 1.15
- * Upgrade provided SQLite parser to 3.15.1 (TIKA-2171).
+Release 1.15 -???
 
-Release 1.14 - ???
+  * Add extraction of .jpx inline images from PDFs (TIKA-2175).
+
+  * Add .jpx, .jp2, .ppm to formats handled by Tesseract (TIKA-2174).
+
+  * Upgrade provided SQLite parser to 3.15.1 (TIKA-2171).
+
+Release 1.14 - 11/9/2016
 
   * Extract all headers from MSG/RFC822 (TIKA-2122).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index a83d419..031e8b9 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -97,7 +97,7 @@ public class TesseractOCRParser extends AbstractParser {
                     MediaType.image("png"), MediaType.image("jpeg"), MediaType.image("tiff"),
                     MediaType.image("x-ms-bmp"), MediaType.image("gif"),
                     MediaType.APPLICATION_XML.image("jp2"),
-                    MediaType.image("jpx")
+                    MediaType.image("jpx"), MediaType.image("x-portable-pixmap")
             })));
     private static Map<String,Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
 

http://git-wip-us.apache.org/repos/asf/tika/blob/3f24e6c3/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 8d7e9a9..ce5531d 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -244,4 +244,6 @@ public class TesseractOCRParserTest extends TikaTest {
         assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
         assertEquals("72 dots per inch", m.get("Y Resolution"));
     }
+
+    //TODO: add unit tests for jp2/jpx and ppm
 }