You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/12/02 19:33:53 UTC
[tika] 06/06: TIKA-2630: Wrong height and width metadata for JPEG
images (#255)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 31fe67d7ee016579ad3827c44fe7d2bf61f3efbe
Author: Dave Meikle <dm...@apache.org>
AuthorDate: Mon Dec 2 19:03:00 2019 +0000
TIKA-2630: Wrong height and width metadata for JPEG images (#255)
* TIKA-2630:
- Added extraction of image height/width from ExifSubIFDDirectory for compressed images
- Include directory name as key qualifier for Exif directories to avoid clashes
* TIKA-2630: Tidied up code
# Conflicts:
# tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
---
.../tika/parser/image/ImageMetadataExtractor.java | 23 ++++++++++++++++++++--
.../apache/tika/parser/jpeg/JpegParserTest.java | 10 +++++-----
.../tika/parser/ocr/TesseractOCRParserTest.java | 2 +-
.../org/apache/tika/parser/rtf/RTFParserTest.java | 2 +-
4 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index aeb0223..f6670d8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -260,7 +260,11 @@ public class ImageMetadataExtractor {
throws MetadataException {
if (directory.getTags() != null) {
for (Tag tag : directory.getTags()) {
- metadata.set(tag.getTagName(), tag.getDescription());
+ if (directory instanceof ExifDirectoryBase) {
+ metadata.set(directory.getName() + ":" + tag.getTagName(), tag.getDescription());
+ } else {
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
}
}
}
@@ -288,7 +292,11 @@ public class ImageMetadataExtractor {
} else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
value = Boolean.FALSE.toString();
}
- metadata.set(name, value);
+ if (directory instanceof ExifDirectoryBase) {
+ metadata.set(directory.getName() + ":" + name, value);
+ } else {
+ metadata.set(name, value);
+ }
}
}
}
@@ -493,6 +501,17 @@ public class ImageMetadataExtractor {
metadata.set(Metadata.IMAGE_LENGTH,
trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
}
+
+ // For Compressed Images read from ExifSubIFDDirectory
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_WIDTH,
+ trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)));
+ }
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_LENGTH,
+ trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_HEIGHT)));
+ }
+
}
/**
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
index c710f23..a1339d5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
@@ -65,8 +65,8 @@ public class JpegParserTest {
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
// Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
@@ -86,7 +86,7 @@ public class JpegParserTest {
// Check that EXIF/TIFF tags come through with their raw values too
// (This may be removed for Tika 1.0, as we support more of them
// with explicit Metadata entries)
- assertEquals("Canon EOS 40D", metadata.get("Model"));
+ assertEquals("Canon EOS 40D", metadata.get("Exif IFD0:Model"));
// Common tags
assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
@@ -115,8 +115,8 @@ public class JpegParserTest {
assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
// Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 45ef4e2..7e3f01c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -256,7 +256,7 @@ public class TesseractOCRParserTest extends TikaTest {
m = getXML("testTIFF.tif").metadata;
assertEquals("100", m.get(Metadata.IMAGE_WIDTH));
assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
- assertEquals("72 dots per inch", m.get("Y Resolution"));
+ assertEquals("72 dots per inch", m.get("Exif IFD0:Y Resolution"));
}
//TODO: add unit tests for jp2/jpx/ppm TIKA-2174
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index 27f3b2a..79c5834 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -466,7 +466,7 @@ public class RTFParserTest extends TikaTest {
assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
assertEquals(51, meta_jpg.names().length);
- assertEquals(110, meta_jpg_exif.names().length);
+ assertEquals(112, meta_jpg_exif.names().length);
}
@Test