You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/02 18:39:03 UTC
svn commit: r1164617 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
main/java/org/apache/tika/parser/image/ImageParser.java
test/java/org/apache/tika/parser/image/ImageParserTest.java
Author: jukka
Date: Fri Sep 2 16:39:02 2011
New Revision: 1164617
URL: http://svn.apache.org/viewvc?rev=1164617&view=rev
Log:
TIKA-702: Cannot compile Tika with Java 7 (ImageMetadataExtractor.java)
Avoid using the troublesome JpegMetadataReader class from metadata-extractor.
Normalize extracted metadata values to prevent different results being returned when running on Java 7.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java Fri Sep 2 16:39:02 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.image;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.Constructor;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.SimpleDateFormat;
@@ -34,19 +35,24 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.Property;
import org.xml.sax.SAXException;
-import com.drew.imaging.jpeg.JpegMetadataReader;
import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.imaging.jpeg.JpegSegmentReader;
import com.drew.imaging.tiff.TiffMetadataReader;
import com.drew.imaging.tiff.TiffProcessingException;
import com.drew.lang.Rational;
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
+import com.drew.metadata.MetadataReader;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.ExifDirectory;
+import com.drew.metadata.exif.ExifReader;
import com.drew.metadata.exif.GpsDirectory;
import com.drew.metadata.iptc.IptcDirectory;
+import com.drew.metadata.iptc.IptcReader;
import com.drew.metadata.jpeg.JpegCommentDirectory;
+import com.drew.metadata.jpeg.JpegCommentReader;
import com.drew.metadata.jpeg.JpegDirectory;
+import com.drew.metadata.jpeg.JpegReader;
/**
* Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
@@ -85,17 +91,42 @@ public class ImageMetadataExtractor {
public void parseJpeg(File file)
throws IOException, SAXException, TikaException {
try {
- com.drew.metadata.Metadata jpegMetadata =
- JpegMetadataReader.readMetadata(file);
-
- handle(jpegMetadata);
+ JpegSegmentReader reader = new JpegSegmentReader(file);
+ extractMetadataFromSegment(
+ reader, JpegSegmentReader.SEGMENT_APP1, ExifReader.class);
+ extractMetadataFromSegment(
+ reader, JpegSegmentReader.SEGMENT_APPD, IptcReader.class);
+ extractMetadataFromSegment(
+ reader, JpegSegmentReader.SEGMENT_SOF0, JpegReader.class);
+ extractMetadataFromSegment(
+ reader, JpegSegmentReader.SEGMENT_COM, JpegCommentReader.class);
} catch (JpegProcessingException e) {
throw new TikaException("Can't read JPEG metadata", e);
- } catch (MetadataException e) {
- throw new TikaException("Can't read JPEG metadata", e);
}
}
-
+
+ private void extractMetadataFromSegment(
+ JpegSegmentReader reader, byte marker,
+ Class<? extends MetadataReader> klass) {
+ try {
+ Constructor<? extends MetadataReader> constructor =
+ klass.getConstructor(byte[].class);
+
+ int n = reader.getSegmentCount(marker);
+ for (int i = 0; i < n; i++) {
+ byte[] segment = reader.readSegment(marker, i);
+
+ com.drew.metadata.Metadata metadata =
+ new com.drew.metadata.Metadata();
+ constructor.newInstance(segment).extract(metadata);
+
+ handle(metadata);
+ }
+ } catch (Exception e) {
+ // Unable to read this kind of metadata, so skip
+ }
+ }
+
protected void parseTiff(InputStream stream)
throws IOException, SAXException, TikaException {
try {
@@ -190,7 +221,12 @@ public class ImageMetadataExtractor {
String name = tag.getTagName();
if (!MetadataFields.isMetadataField(name)) {
try {
- String value = tag.getDescription();
+ String value = tag.getDescription().trim();
+ if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.TRUE.toString();
+ } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.FALSE.toString();
+ }
metadata.set(name, value);
} catch(MetadataException e) {
// Either something's corrupt, or it's a JPEG tag
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Fri Sep 2 16:39:02 2011
@@ -152,15 +152,17 @@ public class ImageParser extends Abstrac
int length = map.getLength();
if (length == 1) {
- metadata.add(parents, map.item(0).getNodeValue());
+ metadata.add(parents, normalize(map.item(0).getNodeValue()));
} else if (length > 1) {
- StringBuffer value = new StringBuffer();
+ StringBuilder value = new StringBuilder();
for (int i = 0; i < length; i++) {
if (i > 0) {
value.append(", ");
}
Node attr = map.item(i);
- value.append(attr.getNodeName()).append("=").append(attr.getNodeValue());
+ value.append(attr.getNodeName());
+ value.append("=");
+ value.append(normalize(attr.getNodeValue()));
}
metadata.add(parents, value.toString());
}
@@ -174,4 +176,18 @@ public class ImageParser extends Abstrac
}
}
+ private static String normalize(String value) {
+ if (value != null) {
+ value = value.trim();
+ } else {
+ value = "";
+ }
+ if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+ return Boolean.TRUE.toString();
+ } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+ return Boolean.FALSE.toString();
+ }
+ return value;
+ }
+
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=1164617&r1=1164616&r2=1164617&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java Fri Sep 2 16:39:02 2011
@@ -38,7 +38,7 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
- assertEquals("8 8 8 ", metadata.get("Data BitsPerSample"));
+ assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
@@ -59,7 +59,7 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
- assertEquals("TRUE", metadata.get("Compression Lossless"));
+ assertEquals("true", metadata.get("Compression Lossless"));
assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
@@ -70,7 +70,7 @@ public class ImageParserTest extends Tes
assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
- assertEquals("TRUE", metadata.get("Chroma BlackIsZero"));
+ assertEquals("true", metadata.get("Chroma BlackIsZero"));
assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
assertEquals("image/gif", metadata.get("Content-Type"));