You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/02/12 17:01:15 UTC

[tika] branch master updated: TIKA-3041 -- keep inline image names the same as they were.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c855c9  TIKA-3041 -- keep inline image names the same as they were.
9c855c9 is described below

commit 9c855c94f57d0a18352aec319f698b479f394c25
Author: tallison <ta...@apache.org>
AuthorDate: Wed Feb 12 12:00:55 2020 -0500

    TIKA-3041 -- keep inline image names the same as they were.
---
 .../java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java     | 2 +-
 .../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java  | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
index 96d8302..b660560 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java
@@ -265,7 +265,7 @@ class ImageGraphicsEngine extends PDFGraphicsStreamEngine {
         //this is the metadata for this particular image
         Metadata metadata = new Metadata();
         String suffix = getSuffix(image, metadata);
-        String fileName = "image-" + imageNumber + "." + suffix;
+        String fileName = "image" + imageNumber + "." + suffix;
 
 
         AttributesImpl attr = new AttributesImpl();
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index ce8df1b..3717a3f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -654,7 +654,7 @@ public class PDFParserTest extends TikaTest {
 
         assertEquals(5, metadatas.size());
         assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY));
-        assertEquals("image-0.jpg", metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+        assertEquals("image0.jpg", metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
         assertEquals("Press Quality(1).joboptions", metadatas.get(3).get(TikaCoreProperties.RESOURCE_NAME_KEY));
         assertEquals("Unit10.doc", metadatas.get(4).get(TikaCoreProperties.RESOURCE_NAME_KEY));
         assertEquals(MediaType.image("jpeg").toString(), metadatas.get(1).get(Metadata.CONTENT_TYPE));
@@ -674,7 +674,6 @@ public class PDFParserTest extends TikaTest {
 
 
         List<Metadata> metadatas = getRecursiveMetadata("testPDF_JBIG2.pdf", context);
-        debug(metadatas);
         assertEquals(2, metadatas.size());
         assertContains("test images compressed using JBIG2", metadatas.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
 
@@ -687,7 +686,7 @@ public class PDFParserTest extends TikaTest {
         assertEquals("Invalid width.", "352", metadatas.get(1).get("width"));
         
         assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY));
-        assertEquals("image-0.jb2",
+        assertEquals("image0.jb2",
                 metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
         assertEquals(MediaType.image("x-jbig2").toString(), 
                 metadatas.get(1).get(Metadata.CONTENT_TYPE));
@@ -931,7 +930,7 @@ public class PDFParserTest extends TikaTest {
         //regular attachment
         assertContains("<div source=\"attachment\" class=\"embedded\" id=\"Unit10.doc\" />", r.xml);
         //inline image
-        assertContains("<img src=\"embedded:image-1.tif\" alt=\"image-1.tif\" />", r.xml);
+        assertContains("<img src=\"embedded:image1.tif\" alt=\"image1.tif\" />", r.xml);
 
         //doc embedded inside an annotation
         r = getXML("testPDFFileEmbInAnnotation.pdf");
@@ -1476,7 +1475,7 @@ public class PDFParserTest extends TikaTest {
 
         List<Metadata> metadataList = getRecursiveMetadata("testPDF_PDFBOX-52.pdf", context);
         int max = 0;
-        Matcher matcher = Pattern.compile("image-(\\d+)").matcher("");
+        Matcher matcher = Pattern.compile("image(\\d+)").matcher("");
         for (Metadata m : metadataList) {
             String n = m.get(TikaCoreProperties.RESOURCE_NAME_KEY);