You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/14 11:34:55 UTC
svn commit: r933893 - in /lucene/tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/image/ImageParser.java
test/java/org/apache/tika/parser/image/ImageParserTest.java
Author: jukka
Date: Wed Apr 14 09:34:54 2010
New Revision: 933893
URL: http://svn.apache.org/viewvc?rev=933893&view=rev
Log:
TIKA-92: Image metadata extraction
Patch by Dmitry Kuzmenko.
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=933893&r1=933892&r2=933893&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Wed Apr 14 09:34:54 2010
@@ -16,7 +16,7 @@
*/
package org.apache.tika.parser.image;
- import java.io.IOException;
+import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
@@ -27,6 +27,7 @@ import java.util.Set;
import javax.imageio.IIOException;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
+import javax.imageio.metadata.IIOMetadata;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
@@ -35,6 +36,8 @@ import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -70,6 +73,9 @@ public class ImageParser implements Pars
new CloseShieldInputStream(stream)));
metadata.set("height", Integer.toString(reader.getHeight(0)));
metadata.set("width", Integer.toString(reader.getWidth(0)));
+
+ loadMetadata(reader.getImageMetadata(0), metadata);
+
reader.dispose();
}
} catch (IIOException e) {
@@ -91,4 +97,51 @@ public class ImageParser implements Pars
parse(stream, handler, metadata, new ParseContext());
}
+ private static void loadMetadata(IIOMetadata imageMetadata, Metadata metadata) {
+ String[] names = imageMetadata.getMetadataFormatNames();
+ if (names == null) {
+ return;
+ }
+ int length = names.length;
+ for (int i = 0; i < length; i++) {
+ loadNode(metadata, imageMetadata.getAsTree(names[i]), "", false);
+ }
+ }
+
+ private static void loadNode(
+ Metadata metadata, Node node, String parents,
+ boolean addThisNodeName) {
+ if (addThisNodeName) {
+ if (parents.length() > 0) {
+ parents += " ";
+ }
+ parents += node.getNodeName();
+ }
+ NamedNodeMap map = node.getAttributes();
+ if (map != null) {
+
+ int length = map.getLength();
+ if (length == 1) {
+ metadata.add(parents, map.item(0).getNodeValue());
+ } else if (length > 1) {
+ StringBuffer value = new StringBuffer();
+ for (int i = 0; i < length; i++) {
+ if (i > 0) {
+ value.append(", ");
+ }
+ Node attr = map.item(i);
+ value.append(attr.getNodeName()).append("=").append(attr.getNodeValue());
+ }
+ metadata.add(parents, value.toString());
+ }
+ }
+
+ Node child = node.getFirstChild();
+ while (child != null) {
+ // print children recursively
+ loadNode(metadata, child, parents, true);
+ child = child.getNextSibling();
+ }
+ }
+
}
Modified: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=933893&r1=933892&r2=933893&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java (original)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageParserTest.java Wed Apr 14 09:34:54 2010
@@ -37,6 +37,12 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
+ assertEquals("8 8 8 ", metadata.get("Data BitsPerSample"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
+ assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
+ assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName"));
+ assertEquals("image/bmp", metadata.get("Content-Type"));
}
public void testGIF() throws Exception {
@@ -48,6 +54,21 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
+ assertEquals("TRUE", metadata.get("Compression Lossless"));
+ assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
+ assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
+ assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
+ assertEquals("Index", metadata.get("Data SampleFormat"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
+ assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
+ assertEquals("TRUE", metadata.get("Chroma BlackIsZero"));
+ assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
+ assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
+ assertEquals("image/gif", metadata.get("Content-Type"));
}
public void testJPEG() throws Exception {
@@ -59,6 +80,26 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
+ assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize"));
+ assertEquals("false", metadata.get("Compression Lossless"));
+ assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable"));
+ assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF"));
+ assertEquals("225", metadata.get("markerSequence unknown"));
+ assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0", metadata.get("markerSequence sos scanComponentSpec"));
+ assertEquals("normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable"));
+ assertEquals("numScanComponents=3, startSpectralSelection=0, endSpectralSelection=63, approxHigh=0, approxLow=0", metadata.get("markerSequence sos"));
+ assertEquals("componentId=1, HsamplingFactor=1, VsamplingFactor=1, QtableSelector=0", metadata.get("markerSequence sof componentSpec"));
+ assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
+ assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize"));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("markerSequence com"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("Text TextEntry"));
+ assertEquals("image/jpeg", metadata.get("Content-Type"));
+ assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, numFrameComponents=3", metadata.get("markerSequence sof"));
}
public void testPNG() throws Exception {
@@ -70,6 +111,28 @@ public class ImageParserTest extends Tes
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
+ assertEquals("0.35273367", metadata.get("Dimension VerticalPixelSize"));
+ assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
+ assertEquals("Perceptual", metadata.get("sRGB"));
+ assertEquals("true", metadata.get("Compression Lossless"));
+ assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("tIME"));
+ assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("tEXt tEXtEntry"));
+ assertEquals("deflate", metadata.get("Compression CompressionTypeName"));
+ assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
+ assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize"));
+ assertEquals("none", metadata.get("Transparency Alpha"));
+ assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835, unitSpecifier=meter", metadata.get("pHYs"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
+ assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration"));
+ assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none", metadata.get("IHDR"));
+ assertEquals("true", metadata.get("Chroma BlackIsZero"));
+ assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("Document ImageModificationTime"));
+ assertEquals("image/png", metadata.get("Content-Type"));
}
// TODO: Add TIFF support