You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by tp...@apache.org on 2015/03/15 06:13:07 UTC
svn commit: r1666779 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
Author: tpalsulich
Date: Sun Mar 15 05:13:06 2015
New Revision: 1666779
URL: http://svn.apache.org/r1666779
Log:
Pure whitespace change. Reformat ImageMetadataExtractor.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1666779&r1=1666778&r2=1666779&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java Sun Mar 15 05:13:06 2015
@@ -28,8 +28,6 @@ import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.drew.imaging.jpeg.JpegSegmentType;
-import com.drew.imaging.tiff.TiffProcessingException;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.IPTC;
@@ -40,8 +38,9 @@ import org.xml.sax.SAXException;
import com.drew.imaging.jpeg.JpegMetadataReader;
import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.imaging.jpeg.JpegSegmentType;
import com.drew.imaging.tiff.TiffMetadataReader;
-import com.drew.lang.ByteArrayReader;
+import com.drew.imaging.tiff.TiffProcessingException;
import com.drew.lang.GeoLocation;
import com.drew.lang.Rational;
import com.drew.metadata.Directory;
@@ -60,7 +59,7 @@ import com.drew.metadata.xmp.XmpReader;
/**
* Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
* to read EXIF and IPTC image metadata and map to Tika fields.
- *
+ * <p/>
* As of 2.4.0 the library supports jpeg and tiff.
*/
public class ImageMetadataExtractor {
@@ -74,15 +73,15 @@ public class ImageMetadataExtractor {
*/
public ImageMetadataExtractor(Metadata metadata) {
this(metadata,
- new CopyUnknownFieldsHandler(),
- new JpegCommentHandler(),
- new ExifHandler(),
- new DimensionsHandler(),
- new GeotagHandler(),
- new IptcHandler()
+ new CopyUnknownFieldsHandler(),
+ new JpegCommentHandler(),
+ new ExifHandler(),
+ new DimensionsHandler(),
+ new GeotagHandler(),
+ new IptcHandler()
);
}
-
+
/**
* @param metadata to extract to
* @param handlers handlers in order, note that handlers may override values from earlier handlers
@@ -115,16 +114,16 @@ public class ImageMetadataExtractor {
throw new TikaException("Can't read TIFF metadata", e);
}
}
-
+
public void parseRawExif(InputStream stream, int length, boolean needsExifHeader)
throws IOException, SAXException, TikaException {
byte[] exif;
if (needsExifHeader) {
- exif = new byte[length+6];
- exif[0] = (byte)'E';
- exif[1] = (byte)'x';
- exif[2] = (byte)'i';
- exif[3] = (byte)'f';
+ exif = new byte[length + 6];
+ exif[0] = (byte) 'E';
+ exif[1] = (byte) 'x';
+ exif[2] = (byte) 'i';
+ exif[3] = (byte) 'f';
IOUtils.readFully(stream, exif, 6, length);
} else {
exif = new byte[length];
@@ -132,24 +131,26 @@ public class ImageMetadataExtractor {
}
parseRawExif(exif);
}
+
public void parseRawExif(byte[] exifData)
throws IOException, SAXException, TikaException {
com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
ExifReader reader = new ExifReader();
reader.extract(exifData, metadata, JpegSegmentType.APP1);
-
+
try {
handle(metadata);
} catch (MetadataException e) {
throw new TikaException("Can't process the EXIF Data", e);
}
}
+
public void parseRawXMP(byte[] xmpData)
throws IOException, SAXException, TikaException {
com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
XmpReader reader = new XmpReader();
reader.extract(xmpData, metadata);
-
+
try {
handle(metadata);
} catch (MetadataException e) {
@@ -159,19 +160,21 @@ public class ImageMetadataExtractor {
/**
* Copies extracted tags to tika metadata using registered handlers.
+ *
* @param metadataExtractor Tag directories from a Metadata Extractor "reader"
* @throws MetadataException This method does not handle exceptions from Metadata Extractor
*/
- protected void handle(com.drew.metadata.Metadata metadataExtractor)
+ protected void handle(com.drew.metadata.Metadata metadataExtractor)
throws MetadataException {
handle(metadataExtractor.getDirectories().iterator());
}
/**
* Copies extracted tags to tika metadata using registered handlers.
+ *
* @param directories Metadata Extractor {@link com.drew.metadata.Directory} instances.
* @throws MetadataException This method does not handle exceptions from Metadata Extractor
- */
+ */
protected void handle(Iterator<Directory> directories) throws MetadataException {
while (directories.hasNext()) {
Directory directory = directories.next();
@@ -192,12 +195,13 @@ public class ImageMetadataExtractor {
* @return true if the directory type is supported by this handler
*/
boolean supports(Class<? extends Directory> directoryType);
+
/**
* @param directory extracted tags
- * @param metadata current tika metadata
+ * @param metadata current tika metadata
* @throws MetadataException typically field extraction error, aborts all further extraction
*/
- void handle(Directory directory, Metadata metadata)
+ void handle(Directory directory, Metadata metadata)
throws MetadataException;
}
@@ -209,6 +213,7 @@ public class ImageMetadataExtractor {
public boolean supports(Class<? extends Directory> directoryType) {
return true;
}
+
public void handle(Directory directory, Metadata metadata)
throws MetadataException {
if (directory.getTags() != null) {
@@ -217,8 +222,8 @@ public class ImageMetadataExtractor {
}
}
}
- }
-
+ }
+
/**
* Copies all fields regardless of directory, if the tag name
* is not identical to a known Metadata field name.
@@ -228,36 +233,39 @@ public class ImageMetadataExtractor {
public boolean supports(Class<? extends Directory> directoryType) {
return true;
}
+
public void handle(Directory directory, Metadata metadata)
throws MetadataException {
if (directory.getTags() != null) {
for (Tag tag : directory.getTags()) {
String name = tag.getTagName();
if (!MetadataFields.isMetadataField(name) && tag.getDescription() != null) {
- String value = tag.getDescription().trim();
- if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
- value = Boolean.TRUE.toString();
- } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
- value = Boolean.FALSE.toString();
- }
- metadata.set(name, value);
+ String value = tag.getDescription().trim();
+ if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.TRUE.toString();
+ } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.FALSE.toString();
+ }
+ metadata.set(name, value);
}
}
}
}
}
-
+
/**
* Basic image properties for TIFF and JPEG, at least.
*/
static class DimensionsHandler implements DirectoryHandler {
private final Pattern LEADING_NUMBERS = Pattern.compile("(\\d+)\\s*.*");
+
public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == JpegDirectory.class ||
- directoryType == ExifSubIFDDirectory.class ||
- directoryType == ExifThumbnailDirectory.class ||
- directoryType == ExifIFD0Directory.class;
+ return directoryType == JpegDirectory.class ||
+ directoryType == ExifSubIFDDirectory.class ||
+ directoryType == ExifThumbnailDirectory.class ||
+ directoryType == ExifIFD0Directory.class;
}
+
public void handle(Directory directory, Metadata metadata) throws MetadataException {
// The test TIFF has width and height stored as follows according to exiv2
//Exif.Image.ImageWidth Short 1 100
@@ -273,41 +281,43 @@ public class ImageMetadataExtractor {
// Straightforward
set(directory, metadata, ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
}
+
private void set(Directory directory, Metadata metadata, int extractTag, Property metadataField) {
if (directory.containsTag(extractTag)) {
Matcher m = LEADING_NUMBERS.matcher(directory.getString(extractTag));
- if(m.matches()) {
+ if (m.matches()) {
metadata.set(metadataField, m.group(1));
}
}
}
}
-
+
static class JpegCommentHandler implements DirectoryHandler {
public boolean supports(Class<? extends Directory> directoryType) {
return directoryType == JpegCommentDirectory.class;
}
+
public void handle(Directory directory, Metadata metadata) throws MetadataException {
if (directory.containsTag(JpegCommentDirectory.TAG_COMMENT)) {
metadata.add(TikaCoreProperties.COMMENTS, directory.getString(JpegCommentDirectory.TAG_COMMENT));
}
}
}
-
+
static class ExifHandler implements DirectoryHandler {
// There's a new ExifHandler for each file processed, so this is thread safe
- private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ = new ThreadLocal<SimpleDateFormat>(){
+ private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ = new ThreadLocal<SimpleDateFormat>() {
@Override
- protected SimpleDateFormat initialValue()
- {
- return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
+ protected SimpleDateFormat initialValue() {
+ return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
}
};
public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == ExifIFD0Directory.class ||
+ return directoryType == ExifIFD0Directory.class ||
directoryType == ExifSubIFDDirectory.class;
}
+
public void handle(Directory directory, Metadata metadata) {
try {
handleDateTags(directory, metadata);
@@ -317,6 +327,7 @@ public class ImageMetadataExtractor {
// ignore date parse errors and proceed with other tags
}
}
+
/**
* EXIF may contain image description, although with undefined encoding.
* Use IPTC for other annotation fields, and XMP for unicode support.
@@ -324,105 +335,105 @@ public class ImageMetadataExtractor {
public void handleCommentTags(Directory directory, Metadata metadata) {
if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
- metadata.set(TikaCoreProperties.DESCRIPTION,
+ metadata.set(TikaCoreProperties.DESCRIPTION,
directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
}
}
+
/**
* Maps common TIFF and EXIF tags onto the Tika
- * TIFF image metadata namespace.
- */
+ * TIFF image metadata namespace.
+ */
public void handlePhotoTags(Directory directory, Metadata metadata) {
- if(directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
- Object exposure = directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
- if(exposure instanceof Rational) {
- metadata.set(Metadata.EXPOSURE_TIME, ((Rational)exposure).doubleValue());
- } else {
- metadata.set(Metadata.EXPOSURE_TIME, directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
- }
- }
-
- if(directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
- String flash = directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
- if(flash.contains("Flash fired")) {
- metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
- }
- else if(flash.contains("Flash did not fire")) {
- metadata.set(Metadata.FLASH_FIRED, Boolean.FALSE.toString());
- }
- else {
- metadata.set(Metadata.FLASH_FIRED, flash);
- }
- }
-
- if(directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
- Object fnumber = directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
- if(fnumber instanceof Rational) {
- metadata.set(Metadata.F_NUMBER, ((Rational)fnumber).doubleValue());
- } else {
- metadata.set(Metadata.F_NUMBER, directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
- }
- }
-
- if(directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
- Object length = directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
- if(length instanceof Rational) {
- metadata.set(Metadata.FOCAL_LENGTH, ((Rational)length).doubleValue());
- } else {
- metadata.set(Metadata.FOCAL_LENGTH, directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
- }
- }
-
- if(directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
- metadata.set(Metadata.ISO_SPEED_RATINGS, directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
- }
-
- if(directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
- metadata.set(Metadata.EQUIPMENT_MAKE, directory.getString(ExifIFD0Directory.TAG_MAKE));
- }
- if(directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
- metadata.set(Metadata.EQUIPMENT_MODEL, directory.getString(ExifIFD0Directory.TAG_MODEL));
- }
-
- if(directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
- Object length = directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
- if(length instanceof Integer) {
- metadata.set(Metadata.ORIENTATION, Integer.toString((Integer)length));
- } else {
- metadata.set(Metadata.ORIENTATION, directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
- }
- }
-
- if(directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
- metadata.set(Metadata.SOFTWARE, directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
- }
-
- if(directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
- Object resolution = directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
- if(resolution instanceof Rational) {
- metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational)resolution).doubleValue());
- } else {
- metadata.set(Metadata.RESOLUTION_HORIZONTAL, directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
- }
- }
- if(directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
- Object resolution = directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
- if(resolution instanceof Rational) {
- metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational)resolution).doubleValue());
- } else {
- metadata.set(Metadata.RESOLUTION_VERTICAL, directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
- }
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+ Object exposure = directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
+ if (exposure instanceof Rational) {
+ metadata.set(Metadata.EXPOSURE_TIME, ((Rational) exposure).doubleValue());
+ } else {
+ metadata.set(Metadata.EXPOSURE_TIME, directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
+ }
}
- if(directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
- metadata.set(Metadata.RESOLUTION_UNIT, directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+ String flash = directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
+ if (flash.contains("Flash fired")) {
+ metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
+ } else if (flash.contains("Flash did not fire")) {
+ metadata.set(Metadata.FLASH_FIRED, Boolean.FALSE.toString());
+ } else {
+ metadata.set(Metadata.FLASH_FIRED, flash);
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+ Object fnumber = directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
+ if (fnumber instanceof Rational) {
+ metadata.set(Metadata.F_NUMBER, ((Rational) fnumber).doubleValue());
+ } else {
+ metadata.set(Metadata.F_NUMBER, directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+ Object length = directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
+ if (length instanceof Rational) {
+ metadata.set(Metadata.FOCAL_LENGTH, ((Rational) length).doubleValue());
+ } else {
+ metadata.set(Metadata.FOCAL_LENGTH, directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
+ metadata.set(Metadata.ISO_SPEED_RATINGS, directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+ metadata.set(Metadata.EQUIPMENT_MAKE, directory.getString(ExifIFD0Directory.TAG_MAKE));
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+ metadata.set(Metadata.EQUIPMENT_MODEL, directory.getString(ExifIFD0Directory.TAG_MODEL));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+ Object length = directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
+ if (length instanceof Integer) {
+ metadata.set(Metadata.ORIENTATION, Integer.toString((Integer) length));
+ } else {
+ metadata.set(Metadata.ORIENTATION, directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
+ }
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+ metadata.set(Metadata.SOFTWARE, directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+ Object resolution = directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
+ if (resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational) resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
+ }
}
- if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH)) {
+ if (directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+ Object resolution = directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
+ if (resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational) resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
+ }
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+ metadata.set(Metadata.RESOLUTION_UNIT, directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+ }
+ if (directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH)) {
metadata.set(Metadata.IMAGE_WIDTH, directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH));
}
- if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT)) {
+ if (directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT)) {
metadata.set(Metadata.IMAGE_LENGTH, directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT));
}
}
+
/**
* Maps exif dates to metadata fields.
*/
@@ -435,7 +446,7 @@ public class ImageMetadataExtractor {
// Unless we have GPS time we don't know the time zone so date must be set
// as ISO 8601 datetime without timezone suffix (no Z or +/-)
if (original != null) {
- String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata Extractor uses
+ String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata Extractor uses
metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
}
@@ -443,7 +454,7 @@ public class ImageMetadataExtractor {
if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
Date datetime = directory.getDate(ExifIFD0Directory.TAG_DATETIME);
if (datetime != null) {
- String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(datetime);
+ String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(datetime);
metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
// If Date/Time Original does not exist this might be creation date
if (metadata.get(TikaCoreProperties.CREATED) == null) {
@@ -453,7 +464,7 @@ public class ImageMetadataExtractor {
}
}
}
-
+
/**
* Reads image comments, originally TIKA-472.
* Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
@@ -462,6 +473,7 @@ public class ImageMetadataExtractor {
public boolean supports(Class<? extends Directory> directoryType) {
return directoryType == IptcDirectory.class;
}
+
public void handle(Directory directory, Metadata metadata)
throws MetadataException {
if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
@@ -495,6 +507,7 @@ public class ImageMetadataExtractor {
public boolean supports(Class<? extends Directory> directoryType) {
return directoryType == GpsDirectory.class;
}
+
public void handle(Directory directory, Metadata metadata) throws MetadataException {
GeoLocation geoLocation = ((GpsDirectory) directory).getGeoLocation();
if (geoLocation != null) {