You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/02 18:39:03 UTC

svn commit: r1164617 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java main/java/org/apache/tika/parser/image/ImageParser.java test/java/org/apache/tika/parser/image/ImageParserTest.java

Author: jukka
Date: Fri Sep  2 16:39:02 2011
New Revision: 1164617

URL: http://svn.apache.org/viewvc?rev=1164617&view=rev
Log:
TIKA-702: Cannot compile Tika with Java 7 (ImageMetadataExtractor.java)

Avoid using the troublesome JpegMetadataReader class from metadata-extractor.

Normalize extracted metadata values to prevent different results being returned when running on Java 7.

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java Fri Sep  2 16:39:02 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.image;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.Constructor;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
 import java.text.SimpleDateFormat;
@@ -34,19 +35,24 @@ import org.apache.tika.metadata.Metadata
 import org.apache.tika.metadata.Property;
 import org.xml.sax.SAXException;
 
-import com.drew.imaging.jpeg.JpegMetadataReader;
 import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.imaging.jpeg.JpegSegmentReader;
 import com.drew.imaging.tiff.TiffMetadataReader;
 import com.drew.imaging.tiff.TiffProcessingException;
 import com.drew.lang.Rational;
 import com.drew.metadata.Directory;
 import com.drew.metadata.MetadataException;
+import com.drew.metadata.MetadataReader;
 import com.drew.metadata.Tag;
 import com.drew.metadata.exif.ExifDirectory;
+import com.drew.metadata.exif.ExifReader;
 import com.drew.metadata.exif.GpsDirectory;
 import com.drew.metadata.iptc.IptcDirectory;
+import com.drew.metadata.iptc.IptcReader;
 import com.drew.metadata.jpeg.JpegCommentDirectory;
+import com.drew.metadata.jpeg.JpegCommentReader;
 import com.drew.metadata.jpeg.JpegDirectory;
+import com.drew.metadata.jpeg.JpegReader;
 
 /**
  * Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
@@ -85,17 +91,42 @@ public class ImageMetadataExtractor {
     public void parseJpeg(File file)
             throws IOException, SAXException, TikaException {
         try {
-            com.drew.metadata.Metadata jpegMetadata =
-                JpegMetadataReader.readMetadata(file);
-
-            handle(jpegMetadata);
+            JpegSegmentReader reader = new JpegSegmentReader(file);
+            extractMetadataFromSegment(
+                    reader, JpegSegmentReader.SEGMENT_APP1, ExifReader.class);
+            extractMetadataFromSegment(
+                    reader, JpegSegmentReader.SEGMENT_APPD, IptcReader.class);
+            extractMetadataFromSegment(
+                    reader, JpegSegmentReader.SEGMENT_SOF0, JpegReader.class);
+            extractMetadataFromSegment(
+                    reader, JpegSegmentReader.SEGMENT_COM, JpegCommentReader.class);
         } catch (JpegProcessingException e) {
             throw new TikaException("Can't read JPEG metadata", e);
-        } catch (MetadataException e) {
-            throw new TikaException("Can't read JPEG metadata", e);
         }
     }
-    
+
+    private void extractMetadataFromSegment(
+            JpegSegmentReader reader, byte marker,
+            Class<? extends MetadataReader> klass) {
+        try {
+            Constructor<? extends MetadataReader> constructor =
+                    klass.getConstructor(byte[].class);
+
+            int n = reader.getSegmentCount(marker);
+            for (int i = 0; i < n; i++) {
+                byte[] segment = reader.readSegment(marker, i);
+
+                com.drew.metadata.Metadata metadata =
+                        new com.drew.metadata.Metadata();
+                constructor.newInstance(segment).extract(metadata);
+
+                handle(metadata);
+            }
+        } catch (Exception e) {
+            // Unable to read this kind of metadata, so skip
+        }
+    }
+
     protected void parseTiff(InputStream stream)
             throws IOException, SAXException, TikaException {
         try {
@@ -190,7 +221,12 @@ public class ImageMetadataExtractor {
                 String name = tag.getTagName();
                 if (!MetadataFields.isMetadataField(name)) {
                    try {
-                      String value = tag.getDescription();
+                      String value = tag.getDescription().trim();
+                      if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+                          value = Boolean.TRUE.toString();
+                      } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+                          value = Boolean.FALSE.toString();
+                      }
                       metadata.set(name, value);
                    } catch(MetadataException e) {
                       // Either something's corrupt, or it's a JPEG tag

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Fri Sep  2 16:39:02 2011
@@ -152,15 +152,17 @@ public class ImageParser extends Abstrac
 
             int length = map.getLength();
             if (length == 1) {
-                metadata.add(parents, map.item(0).getNodeValue());
+                metadata.add(parents, normalize(map.item(0).getNodeValue()));
             } else if (length > 1) {
-                StringBuffer value = new StringBuffer();
+                StringBuilder value = new StringBuilder();
                 for (int i = 0; i < length; i++) {
                     if (i > 0) {
                         value.append(", ");
                     }
                     Node attr = map.item(i);
-                    value.append(attr.getNodeName()).append("=").append(attr.getNodeValue());
+                    value.append(attr.getNodeName());
+                    value.append("=");
+                    value.append(normalize(attr.getNodeValue()));
                 }
                 metadata.add(parents, value.toString());
             }
@@ -174,4 +176,18 @@ public class ImageParser extends Abstrac
         }
     }
 
+    private static String normalize(String value) {
+        if (value != null) {
+            value = value.trim();
+        } else {
+            value = "";
+        }
+        if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+            return Boolean.TRUE.toString();
+        } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+            return Boolean.FALSE.toString();
+        }
+        return value;
+    }
+
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java Fri Sep  2 16:39:02 2011
@@ -38,7 +38,7 @@ public class ImageParserTest extends Tes
 
         assertEquals("75", metadata.get("height"));
         assertEquals("100", metadata.get("width"));
-        assertEquals("8 8 8 ", metadata.get("Data BitsPerSample"));
+        assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
         assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
         assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
         assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
@@ -59,7 +59,7 @@ public class ImageParserTest extends Tes
 
         assertEquals("75", metadata.get("height"));
         assertEquals("100", metadata.get("width"));
-        assertEquals("TRUE", metadata.get("Compression Lossless"));
+        assertEquals("true", metadata.get("Compression Lossless"));
         assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
         assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
         assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
@@ -70,7 +70,7 @@ public class ImageParserTest extends Tes
         assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
         assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
         assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
-        assertEquals("TRUE", metadata.get("Chroma BlackIsZero"));
+        assertEquals("true", metadata.get("Chroma BlackIsZero"));
         assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
         assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
         assertEquals("image/gif", metadata.get("Content-Type"));