You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/09/03 18:48:05 UTC
svn commit: r992368 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/metadata/
tika-parsers/src/main/java/org/apache/tika/parser/image/
tika-parsers/src/test/java/org/apache/tika/parser/jpeg/
Author: nick
Date: Fri Sep 3 16:48:05 2010
New Revision: 992368
URL: http://svn.apache.org/viewvc?rev=992368&view=rev
Log:
Add several more common EXIF tags to the TIFF metadata namespace, and have the EXIF parser also output property-typed tags for these (TIKA-504)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java Fri Sep 3 16:48:05 2010
@@ -311,6 +311,22 @@ public class Metadata implements Creativ
}
/**
+ * Sets the real or rational value of the identified metadata property.
+ *
+ * @since Apache Tika 0.8
+ * @param property simple real or simple rational property definition
+ * @param value property value
+ */
+ public void set(Property property, double value) {
+ if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+ throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
+ if(property.getValueType() != Property.ValueType.REAL &&
+ property.getValueType() != Property.ValueType.RATIONAL)
+ throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
+ set(property.getName(), Double.toString(value));
+ }
+
+ /**
* Sets the date value of the identified metadata property.
*
* @since Apache Tika 0.8
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java Fri Sep 3 16:48:05 2010
@@ -52,6 +52,85 @@ public interface TIFF {
Property.internalInteger("tiff:SamplesPerPixel");
/**
+ * "Exposure time in seconds."
+ */
+ Property EXPOSURE_TIME =
+ Property.internalRational("exif:ExposureTime");
+
+ // TODO "exif:Flash"
+
+ /**
+ * "F-Number."
+ * The f-number is the focal length divided by the "effective" aperture
+ * diameter. It is a dimensionless number that is a measure of lens speed.
+ */
+ Property F_NUMBER =
+ Property.internalRational("exif:FNumber");
+
+ /**
+ * "Focal length of the lens, in millimeters."
+ */
+ Property FOCAL_LENGTH =
+ Property.internalRational("exif:FocalLength");
+
+ /**
+ * "ISO Speed and ISO Latitude of the input device as specified in ISO 12232"
+ */
+ Property ISO_SPEED_RATINGS =
+ Property.internalIntegerSequence("exif:IsoSpeedRatings");
+
+ /**
+ * "Manufacturer of the recording equipment."
+ */
+ Property EQUIPMENT_MAKE =
+ Property.internalText("tiff:Make");
+
+ /**
+ * "Model name or number of the recording equipment."
+ */
+ Property EQUIPMENT_MODEL =
+ Property.internalText("tiff:Model");
+
+ /**
+ * "Software or firmware used to generate the image."
+ */
+ Property SOFTWARE =
+ Property.internalText("tiff:Software");
+
+ /**
+ * "The Orientation of the image."
+ * 1 = 0th row at top, 0th column at left
+ * 2 = 0th row at top, 0th column at right
+ * 3 = 0th row at bottom, 0th column at right
+ * 4 = 0th row at bottom, 0th column at left
+ * 5 = 0th row at left, 0th column at top
+ * 6 = 0th row at right, 0th column at top
+ * 7 = 0th row at right, 0th column at bottom
+ * 8 = 0th row at left, 0th column at bottom
+ */
+ Property ORIENTATION =
+ Property.internalClosedChoise("tiff:Orientation", "1", "2", "3", "4", "5", "6", "7", "8");
+
+ /**
+ * "Horizontal resolution in pixels per unit."
+ */
+ Property RESOLUTION_HORIZONTAL =
+ Property.internalRational("tiff:XResolution");
+
+ /**
+ * "Vertical resolution in pixels per unit."
+ */
+ Property RESOLUTION_VERTICAL =
+ Property.internalRational("tiff:YResolution");
+
+ /**
+ * "Units used for Horizontal and Vertical Resolutions."
+ * One of "Inch" or "cm"
+ */
+ Property RESOLUTION_UNIT =
+ Property.internalClosedChoise("tiff:ResolutionUnit", "Inch", "cm");
+
+ /**
* "Date and time when original image was generated"
*/
Property ORIGINAL_DATE =
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java Fri Sep 3 16:48:05 2010
@@ -20,6 +20,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
@@ -35,6 +37,7 @@ import com.drew.imaging.jpeg.JpegMetadat
import com.drew.imaging.jpeg.JpegProcessingException;
import com.drew.imaging.tiff.TiffMetadataReader;
import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.lang.Rational;
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
@@ -80,7 +83,7 @@ public class ImageMetadataExtractor {
while (tags.hasNext()) {
Tag tag = (Tag)tags.next();
metadata.set(tag.getTagName(), tag.getDescription());
- handleCommonImageTags(metadata, tag);
+ handleCommonImageTags(metadata, tag, directory);
}
handleGeoImageTags(metadata);
}
@@ -142,44 +145,139 @@ public class ImageMetadataExtractor {
private static final DecimalFormat LAT_LONG_FORMAT =
new DecimalFormat("##0.0####", new DecimalFormatSymbols(Locale.US));
- private static void handleDate(Metadata metadata, Property property, Tag tag) throws MetadataException {
- // Ensure it's in the right format
- String date = tag.getDescription();
- int splitAt = date.indexOf(' ');
- if(splitAt > -1) {
- String datePart = date.substring(0, splitAt);
- String timePart = date.substring(splitAt+1);
- date = datePart.replace(':', '-') + 'T' + timePart;
- }
- metadata.set(property, date);
+ /**
+ * We normally won't know what timezone our dates belong to
+ */
+ private static final SimpleDateFormat DATE_UNSPECIFIED_TZ =
+ new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ private static synchronized void handleDate(Property property, Metadata metadata, Tag tag, Directory directory) throws MetadataException {
+ Date date = directory.getDate(tag.getTagType());
+ String dateString = DATE_UNSPECIFIED_TZ.format(date);
+ metadata.set(property, dateString);
}
-
+
/**
* Maps common TIFF and EXIF tags onto the Tika
* TIFF image metadata namespace.
*/
- public static void handleCommonImageTags(Metadata metadata, Tag tag) throws MetadataException {
+ public static void handleCommonImageTags(Metadata metadata, Tag tag, Directory directory) throws MetadataException {
// Core tags
if(tag.getTagName().equals("Date/Time") ||
tag.getTagType() == 306) {
- handleDate(metadata, Metadata.DATE, tag);
- handleDate(metadata, Metadata.LAST_MODIFIED, tag);
+ handleDate(Metadata.DATE, metadata, tag, directory);
+ metadata.set(Metadata.LAST_MODIFIED, metadata.get(Metadata.DATE));
return;
}
if(tag.getTagName().equals("Date/Time Original") ||
- tag.getTagType() == 36867) {
- handleDate(metadata, Metadata.ORIGINAL_DATE, tag);
- return;
- }
+ tag.getTagType() == 36867) {
+ handleDate(Metadata.ORIGINAL_DATE, metadata, tag, directory);
+ return;
+ }
+
+ if(tag.getTagName().equals("Exposure Time") ||
+ tag.getTagType() == 33434) {
+ Object exposure = directory.getObject(tag.getTagType());
+ if(exposure instanceof Rational) {
+ metadata.set(Metadata.EXPOSURE_TIME, ((Rational)exposure).doubleValue());
+ } else {
+ metadata.set(Metadata.EXPOSURE_TIME, tag.getDescription());
+ }
+ return;
+ }
+
+ if(tag.getTagName().equals("F-Number") ||
+ tag.getTagType() == 33437) {
+ Object fnumber = directory.getObject(tag.getTagType());
+ if(fnumber instanceof Rational) {
+ metadata.set(Metadata.F_NUMBER, ((Rational)fnumber).doubleValue());
+ } else {
+ metadata.set(Metadata.F_NUMBER, tag.getDescription());
+ }
+ return;
+ }
+
+ if(tag.getTagName().equals("Focal Length") ||
+ tag.getTagType() == 37386) {
+ Object length = directory.getObject(tag.getTagType());
+ if(length instanceof Rational) {
+ metadata.set(Metadata.FOCAL_LENGTH, ((Rational)length).doubleValue());
+ } else {
+ metadata.set(Metadata.FOCAL_LENGTH, tag.getDescription());
+ }
+ return;
+ }
+
+ if(tag.getTagName().equals("ISO Speed Ratings") ||
+ tag.getTagType() == 34855) {
+ metadata.set(Metadata.ISO_SPEED_RATINGS, tag.getDescription());
+ return;
+ }
+
+ if(tag.getTagName().equals("Make") ||
+ tag.getTagType() == 271) {
+ metadata.set(Metadata.EQUIPMENT_MAKE, tag.getDescription());
+ return;
+ }
+ if(tag.getTagName().equals("Model") ||
+ tag.getTagType() == 272) {
+ metadata.set(Metadata.EQUIPMENT_MODEL, tag.getDescription());
+ return;
+ }
+
+ if(tag.getTagName().equals("Orientation") ||
+ tag.getTagType() == 274) {
+ Object length = directory.getObject(tag.getTagType());
+ if(length instanceof Integer) {
+ metadata.set(Metadata.ORIENTATION, Integer.toString( ((Integer)length).intValue() ));
+ } else {
+ metadata.set(Metadata.ORIENTATION, tag.getDescription());
+ }
+ return;
+ }
+
+ if(tag.getTagName().equals("Software") ||
+ tag.getTagType() == 305) {
+ metadata.set(Metadata.SOFTWARE, tag.getDescription());
+ return;
+ }
+
+ if(tag.getTagName().equals("X Resolution") ||
+ tag.getTagType() == 282) {
+ Object resolution = directory.getObject(tag.getTagType());
+ if(resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational)resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, tag.getDescription());
+ }
+ return;
+ }
+ if(tag.getTagName().equals("Y Resolution") ||
+ tag.getTagType() == 283) {
+ Object resolution = directory.getObject(tag.getTagType());
+ if(resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational)resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, tag.getDescription());
+ }
+ return;
+ }
+ if(tag.getTagName().equals("Resolution Unit") ||
+ tag.getTagType() == 296) {
+ metadata.set(Metadata.RESOLUTION_UNIT, tag.getDescription());
+ return;
+ }
+
if(tag.getTagName().equals("Keywords") ||
tag.getTagType() == 537) {
metadata.set(Metadata.KEYWORDS, tag.getDescription());
return;
}
+
if(tag.getTagName().equals("Jpeg Comment")) {
metadata.set(Metadata.COMMENTS, tag.getDescription());
return;
}
+// System.err.println(directory.getObject(tag.getTagType()) + " " + directory.getObject(tag.getTagType()).getClass());
// File info
// Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java Fri Sep 3 16:48:05 2010
@@ -44,6 +44,20 @@ public class JpegParserTest extends Test
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+ assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+ assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+ assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+ assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+
+ // TODO - Flash
+
// Common tags
assertEquals("Date/Time for when the photo was taken, unspecified time zone",
"2009-10-02T23:02:49", metadata.get(Metadata.DATE));
@@ -72,6 +86,18 @@ public class JpegParserTest extends Test
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+ assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+ assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+ assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+ assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+
// Common tags
assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
"2009-08-11T09:09:45", metadata.get(Metadata.ORIGINAL_DATE));
@@ -95,5 +121,23 @@ public class JpegParserTest extends Test
"ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
+
+ // Core EXIF/TIFF tags
+ assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
+ assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
+ assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals(null, metadata.get(Metadata.SOFTWARE));
+ assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
}
}