You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/09/03 18:48:05 UTC

svn commit: r992368 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/metadata/ tika-parsers/src/main/java/org/apache/tika/parser/image/ tika-parsers/src/test/java/org/apache/tika/parser/jpeg/

Author: nick
Date: Fri Sep  3 16:48:05 2010
New Revision: 992368

URL: http://svn.apache.org/viewvc?rev=992368&view=rev
Log:
Add several more common EXIF tags to the TIFF metadata namespace, and have the EXIF parser also output property-typed tags for these (TIKA-504)

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java Fri Sep  3 16:48:05 2010
@@ -311,6 +311,22 @@ public class Metadata implements Creativ
     }
 
     /**
+     * Sets the real or rational value of the identified metadata property.
+     *
+     * @since Apache Tika 0.8
+     * @param property simple real or simple rational property definition
+     * @param value    property value
+     */
+    public void set(Property property, double value) {
+        if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+            throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
+        if(property.getValueType() != Property.ValueType.REAL &&
+              property.getValueType() != Property.ValueType.RATIONAL)
+            throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
+        set(property.getName(), Double.toString(value));
+    }
+
+    /**
      * Sets the date value of the identified metadata property.
      *
      * @since Apache Tika 0.8

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TIFF.java Fri Sep  3 16:48:05 2010
@@ -52,6 +52,85 @@ public interface TIFF {
         Property.internalInteger("tiff:SamplesPerPixel");
 
     /**
+     * "Exposure time in seconds."
+     */
+    Property EXPOSURE_TIME =
+       Property.internalRational("exif:ExposureTime");
+    
+    //  TODO "exif:Flash"
+    
+    /**
+     * "F-Number."
+     * The f-number is the focal length divided by the "effective" aperture 
+     *  diameter. It is a dimensionless number that is a measure of lens speed. 
+     */
+    Property F_NUMBER =
+       Property.internalRational("exif:FNumber");
+    
+    /**
+     * "Focal length of the lens, in millimeters."
+     */
+    Property FOCAL_LENGTH =
+       Property.internalRational("exif:FocalLength");
+    
+    /**
+     * "ISO Speed and ISO Latitude of the input device as specified in ISO 12232"
+     */
+    Property ISO_SPEED_RATINGS =
+       Property.internalIntegerSequence("exif:IsoSpeedRatings");
+    
+    /**
+     * "Manufacturer of the recording equipment."
+     */
+    Property EQUIPMENT_MAKE =
+       Property.internalText("tiff:Make");
+    
+    /**
+     * "Model name or number of the recording equipment."
+     */
+    Property EQUIPMENT_MODEL =
+       Property.internalText("tiff:Model");
+    
+    /**
+     * "Software or firmware used to generate the image."
+     */
+    Property SOFTWARE =
+       Property.internalText("tiff:Software");
+
+    /**
+     * "The Orientation of the image."
+     *  1 = 0th row at top, 0th column at left
+     *  2 = 0th row at top, 0th column at right
+     *  3 = 0th row at bottom, 0th column at right
+     *  4 = 0th row at bottom, 0th column at left
+     *  5 = 0th row at left, 0th column at top
+     *  6 = 0th row at right, 0th column at top
+     *  7 = 0th row at right, 0th column at bottom
+     *  8 = 0th row at left, 0th column at bottom
+     */
+    Property ORIENTATION =
+       Property.internalClosedChoise("tiff:Orientation", "1", "2", "3", "4", "5", "6", "7", "8");
+    
+    /**
+     * "Horizontal resolution in pixels per unit."
+     */
+    Property RESOLUTION_HORIZONTAL =
+       Property.internalRational("tiff:XResolution");
+    
+    /**
+     * "Vertical resolution in pixels per unit."
+     */
+    Property RESOLUTION_VERTICAL =
+       Property.internalRational("tiff:YResolution");
+    
+    /**
+     * "Units used for Horizontal and Vertical Resolutions."
+     * One of "Inch" or "cm"
+     */
+    Property RESOLUTION_UNIT =
+       Property.internalClosedChoise("tiff:ResolutionUnit", "Inch", "cm"); 
+    
+    /**
      * "Date and time when original image was generated"
      */
     Property ORIGINAL_DATE =

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java Fri Sep  3 16:48:05 2010
@@ -20,6 +20,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 import java.util.Iterator;
 import java.util.Locale;
 import java.util.regex.Matcher;
@@ -35,6 +37,7 @@ import com.drew.imaging.jpeg.JpegMetadat
 import com.drew.imaging.jpeg.JpegProcessingException;
 import com.drew.imaging.tiff.TiffMetadataReader;
 import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.lang.Rational;
 import com.drew.metadata.Directory;
 import com.drew.metadata.MetadataException;
 import com.drew.metadata.Tag;
@@ -80,7 +83,7 @@ public class ImageMetadataExtractor {
              while (tags.hasNext()) {
                 Tag tag = (Tag)tags.next();
                 metadata.set(tag.getTagName(), tag.getDescription());
-                handleCommonImageTags(metadata, tag);
+                handleCommonImageTags(metadata, tag, directory);
              }
              handleGeoImageTags(metadata);
           }
@@ -142,44 +145,139 @@ public class ImageMetadataExtractor {
     private static final DecimalFormat LAT_LONG_FORMAT =
         new DecimalFormat("##0.0####", new DecimalFormatSymbols(Locale.US));
 
-    private static void handleDate(Metadata metadata, Property property, Tag tag) throws MetadataException {
-       // Ensure it's in the right format
-       String date = tag.getDescription();
-       int splitAt = date.indexOf(' '); 
-       if(splitAt > -1) {
-           String datePart = date.substring(0, splitAt);
-           String timePart = date.substring(splitAt+1);
-           date = datePart.replace(':', '-') + 'T' + timePart;
-       }
-       metadata.set(property, date);
+    /**
+     * We normally won't know what timezone our dates belong to
+     */
+    private static final SimpleDateFormat DATE_UNSPECIFIED_TZ = 
+       new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+    private static synchronized void handleDate(Property property, Metadata metadata, Tag tag, Directory directory) throws MetadataException {
+       Date date = directory.getDate(tag.getTagType());
+       String dateString = DATE_UNSPECIFIED_TZ.format(date);
+       metadata.set(property, dateString);
     }
-
+    
     /**
      * Maps common TIFF and EXIF tags onto the Tika
      *  TIFF image metadata namespace.
      */
-    public static void handleCommonImageTags(Metadata metadata, Tag tag) throws MetadataException {
+    public static void handleCommonImageTags(Metadata metadata, Tag tag, Directory directory) throws MetadataException {
         // Core tags
         if(tag.getTagName().equals("Date/Time") ||
                 tag.getTagType() == 306) {
-            handleDate(metadata, Metadata.DATE, tag);
-            handleDate(metadata, Metadata.LAST_MODIFIED, tag);
+            handleDate(Metadata.DATE, metadata, tag, directory);
+            metadata.set(Metadata.LAST_MODIFIED, metadata.get(Metadata.DATE));
             return;
         }
         if(tag.getTagName().equals("Date/Time Original") ||
-              tag.getTagType() == 36867) {
-          handleDate(metadata, Metadata.ORIGINAL_DATE, tag);
-          return;
-      }
+                tag.getTagType() == 36867) {
+           handleDate(Metadata.ORIGINAL_DATE, metadata, tag, directory);
+           return;
+        }
+        
+        if(tag.getTagName().equals("Exposure Time") ||
+                tag.getTagType() == 33434) {
+           Object exposure = directory.getObject(tag.getTagType());
+           if(exposure instanceof Rational) {
+              metadata.set(Metadata.EXPOSURE_TIME, ((Rational)exposure).doubleValue());
+           } else {
+              metadata.set(Metadata.EXPOSURE_TIME, tag.getDescription());
+           }
+           return;
+        }
+        
+        if(tag.getTagName().equals("F-Number") ||
+                tag.getTagType() == 33437) {
+           Object fnumber = directory.getObject(tag.getTagType());
+           if(fnumber instanceof Rational) {
+              metadata.set(Metadata.F_NUMBER, ((Rational)fnumber).doubleValue());
+           } else {
+              metadata.set(Metadata.F_NUMBER, tag.getDescription());
+           }
+           return;
+        }
+        
+        if(tag.getTagName().equals("Focal Length") ||
+                tag.getTagType() == 37386) {
+           Object length = directory.getObject(tag.getTagType());
+           if(length instanceof Rational) {
+              metadata.set(Metadata.FOCAL_LENGTH, ((Rational)length).doubleValue());
+           } else {
+              metadata.set(Metadata.FOCAL_LENGTH, tag.getDescription());
+           }
+           return;
+        }
+        
+        if(tag.getTagName().equals("ISO Speed Ratings") ||
+                tag.getTagType() == 34855) {
+           metadata.set(Metadata.ISO_SPEED_RATINGS, tag.getDescription());
+           return;
+        }
+      
+        if(tag.getTagName().equals("Make") ||
+                tag.getTagType() == 271) {
+           metadata.set(Metadata.EQUIPMENT_MAKE, tag.getDescription());
+           return;
+        }
+        if(tag.getTagName().equals("Model") ||
+                tag.getTagType() == 272) {
+           metadata.set(Metadata.EQUIPMENT_MODEL, tag.getDescription());
+           return;
+        }
+      
+        if(tag.getTagName().equals("Orientation") ||
+                tag.getTagType() == 274) {
+           Object length = directory.getObject(tag.getTagType());
+           if(length instanceof Integer) {
+              metadata.set(Metadata.ORIENTATION, Integer.toString( ((Integer)length).intValue() ));
+           } else {
+              metadata.set(Metadata.ORIENTATION, tag.getDescription());
+           }
+           return;
+        }
+        
+        if(tag.getTagName().equals("Software") ||
+                tag.getTagType() == 305) {
+           metadata.set(Metadata.SOFTWARE, tag.getDescription());
+           return;
+        }
+        
+        if(tag.getTagName().equals("X Resolution") ||
+                tag.getTagType() == 282) {
+           Object resolution = directory.getObject(tag.getTagType());
+           if(resolution instanceof Rational) {
+              metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational)resolution).doubleValue());
+           } else {
+              metadata.set(Metadata.RESOLUTION_HORIZONTAL, tag.getDescription());
+           }
+           return;
+        }
+        if(tag.getTagName().equals("Y Resolution") ||
+                tag.getTagType() == 283) {
+           Object resolution = directory.getObject(tag.getTagType());
+           if(resolution instanceof Rational) {
+              metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational)resolution).doubleValue());
+           } else {
+              metadata.set(Metadata.RESOLUTION_VERTICAL, tag.getDescription());
+           }
+           return;
+        }
+        if(tag.getTagName().equals("Resolution Unit") ||
+                tag.getTagType() == 296) {
+           metadata.set(Metadata.RESOLUTION_UNIT, tag.getDescription());
+           return;
+        }
+
         if(tag.getTagName().equals("Keywords") ||
                 tag.getTagType() == 537) {
             metadata.set(Metadata.KEYWORDS, tag.getDescription());
             return;
         }
+        
         if(tag.getTagName().equals("Jpeg Comment")) {
             metadata.set(Metadata.COMMENTS, tag.getDescription());
             return;
         }
+//      System.err.println(directory.getObject(tag.getTagType()) + " " + directory.getObject(tag.getTagType()).getClass());
 
         // File info
         // Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=992368&r1=992367&r2=992368&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java Fri Sep  3 16:48:05 2010
@@ -44,6 +44,20 @@ public class JpegParserTest extends Test
         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
         assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
         
+        assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+        assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+        assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+        assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+        assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+        assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+        assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+        assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+        assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+        assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+        assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+        
+        // TODO - Flash
+        
         // Common tags
         assertEquals("Date/Time for when the photo was taken, unspecified time zone",
                 "2009-10-02T23:02:49", metadata.get(Metadata.DATE));
@@ -72,6 +86,18 @@ public class JpegParserTest extends Test
         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
         assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
         
+        assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+        assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+        assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+        assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+        assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+        assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+        assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+        assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+        assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+        assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+        assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+        
         // Common tags
         assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
                 "2009-08-11T09:09:45", metadata.get(Metadata.ORIGINAL_DATE));
@@ -95,5 +121,23 @@ public class JpegParserTest extends Test
         		"ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
         assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
         assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
+        
+        // Core EXIF/TIFF tags
+        assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+        
+        assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
+        assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
+        assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
+        assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
+        assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
+        assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
+        assertEquals(null, metadata.get(Metadata.SOFTWARE));
+        assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present
+        assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+        assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+        assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
     }
 }