You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/12/02 19:03:08 UTC
[tika] branch branch_1x updated: TIKA-2630: Wrong height and width
metadata for JPEG images (#255)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new 90880e1 TIKA-2630: Wrong height and width metadata for JPEG images (#255)
90880e1 is described below
commit 90880e179f98c06ca948b8fed65e902f5b520e6b
Author: Dave Meikle <dm...@apache.org>
AuthorDate: Mon Dec 2 19:03:00 2019 +0000
TIKA-2630: Wrong height and width metadata for JPEG images (#255)
* TIKA-2630:
- Added extraction of image height/width from ExifSubIFDDirectory for compressed images
- Include directory name as key qualifier for Exif directories to avoid clashes
* TIKA-2630: Tidied up code
---
.../tika/parser/image/ImageMetadataExtractor.java | 23 ++++++++++++++++++++--
.../apache/tika/parser/jpeg/JpegParserTest.java | 10 +++++-----
.../tika/parser/ocr/TesseractOCRParserTest.java | 2 +-
.../org/apache/tika/parser/rtf/RTFParserTest.java | 4 ++--
4 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index 18ab8f1..9fec322 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -260,7 +260,11 @@ public class ImageMetadataExtractor {
throws MetadataException {
if (directory.getTags() != null) {
for (Tag tag : directory.getTags()) {
- metadata.set(tag.getTagName(), tag.getDescription());
+ if (directory instanceof ExifDirectoryBase) {
+ metadata.set(directory.getName() + ":" + tag.getTagName(), tag.getDescription());
+ } else {
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
}
}
}
@@ -288,7 +292,11 @@ public class ImageMetadataExtractor {
} else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
value = Boolean.FALSE.toString();
}
- metadata.set(name, value);
+ if (directory instanceof ExifDirectoryBase) {
+ metadata.set(directory.getName() + ":" + name, value);
+ } else {
+ metadata.set(name, value);
+ }
}
}
}
@@ -493,6 +501,17 @@ public class ImageMetadataExtractor {
metadata.set(Metadata.IMAGE_LENGTH,
trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
}
+
+ // For Compressed Images read from ExifSubIFDDirectory
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_WIDTH,
+ trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)));
+ }
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXIF_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_LENGTH,
+ trimPixels(directory.getDescription(ExifSubIFDDirectory.TAG_EXIF_IMAGE_HEIGHT)));
+ }
+
}
/**
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
index d32dfc4..dd0d234 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
@@ -65,8 +65,8 @@ public class JpegParserTest {
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
// Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
@@ -86,7 +86,7 @@ public class JpegParserTest {
// Check that EXIF/TIFF tags come through with their raw values too
// (This may be removed for Tika 1.0, as we support more of them
// with explicit Metadata entries)
- assertEquals("Canon EOS 40D", metadata.get("Model"));
+ assertEquals("Canon EOS 40D", metadata.get("Exif IFD0:Model"));
// Common tags
assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
@@ -120,8 +120,8 @@ public class JpegParserTest {
assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
// Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("2592", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index bb87262..b9b9504 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -255,7 +255,7 @@ public class TesseractOCRParserTest extends TikaTest {
m = getXML("testTIFF.tif").metadata;
assertEquals("100", m.get(Metadata.IMAGE_WIDTH));
assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
- assertEquals("72 dots per inch", m.get("Y Resolution"));
+ assertEquals("72 dots per inch", m.get("Exif IFD0:Y Resolution"));
}
//TODO: add unit tests for jp2/jpx/ppm TIKA-2174
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index 3a22bdd..f19cab0 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -468,8 +468,8 @@ public class RTFParserTest extends TikaTest {
assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
- assertEquals(51, meta_jpg.names().length);
- assertEquals(115, meta_jpg_exif.names().length);
+ assertEquals(50, meta_jpg.names().length);
+ assertEquals(116, meta_jpg_exif.names().length);
}
@Test