You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/14 11:38:59 UTC
svn commit: r933894 - in /lucene/tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/image/ main/resources/META-INF/services/
test/java/org/apache/tika/parser/image/
Author: jukka
Date: Wed Apr 14 09:38:59 2010
New Revision: 933894
URL: http://svn.apache.org/viewvc?rev=933894&view=rev
Log:
TIKA-92: Image metadata extraction
Patch by Dmitry Kuzmenko.
Added:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java (with props)
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java (with props)
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java (with props)
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=933894&r1=933893&r2=933894&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Wed Apr 14 09:38:59 2010
@@ -48,7 +48,6 @@ public class ImageParser implements Pars
MediaType.image("bmp"),
MediaType.image("gif"),
MediaType.image("png"),
- MediaType.image("tiff"),
MediaType.image("vnd.wap.wbmp"),
MediaType.image("x-icon"),
MediaType.image("x-psd"),
Added: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java?rev=933894&view=auto
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java (added)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java Wed Apr 14 09:38:59 2010
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.SAXException;
+
+import com.drew.imaging.tiff.TiffMetadataReader;
+import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.metadata.Directory;
+import com.drew.metadata.MetadataException;
+import com.drew.metadata.Tag;
+
+class TiffExtractor {
+
+ private final Metadata metadata;
+
+ public TiffExtractor(Metadata metadata) {
+ this.metadata = metadata;
+ }
+
+ public void parse(InputStream stream)
+ throws IOException, SAXException, TikaException {
+ try {
+ com.drew.metadata.Metadata tiffMetadata =
+ TiffMetadataReader.readMetadata(stream);
+
+ Iterator<?> directories = tiffMetadata.getDirectoryIterator();
+ while (directories.hasNext()) {
+ Directory directory = (Directory) directories.next();
+ Iterator<?> tags = directory.getTagIterator();
+
+ while (tags.hasNext()) {
+ Tag tag = (Tag)tags.next();
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
+ }
+ } catch (TiffProcessingException e) {
+ throw new TikaException("Can't read TIFF metadata", e);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't read TIFF metadata", e);
+ }
+ }
+
+}
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java?rev=933894&view=auto
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java (added)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java Wed Apr 14 09:38:59 2010
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class TiffParser implements Parser {
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.image("tiff"));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ parse(stream, handler, metadata, new ParseContext());
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ new TiffExtractor(metadata).parse(stream);
+
+ for (String s : metadata.names()) {
+ if (s.startsWith("Unknown tag")) {
+ metadata.remove(s);
+ }
+ }
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+
+}
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=933894&r1=933893&r2=933894&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (original)
+++ lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Wed Apr 14 09:38:59 2010
@@ -19,6 +19,7 @@ org.apache.tika.parser.audio.MidiParser
org.apache.tika.parser.epub.EpubParser
org.apache.tika.parser.html.HtmlParser
org.apache.tika.parser.image.ImageParser
+org.apache.tika.parser.image.TiffParser
org.apache.tika.parser.jpeg.JpegParser
org.apache.tika.parser.mbox.MboxParser
org.apache.tika.parser.microsoft.OfficeParser
Added: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java?rev=933894&view=auto
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java (added)
+++ lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java Wed Apr 14 09:38:59 2010
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import junit.framework.TestCase;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.image.TiffParser;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+
+public class TiffParserTest extends TestCase {
+ private final Parser parser = new TiffParser();
+
+ public void testTIFF() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testTIFF.tif");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
+ "more contributor license agreements. See the NOTICE file " +
+ "distributed with this work for additional information regarding " +
+ "copyright ownership.", metadata.get("Image Description"));
+ }
+}
Propchange: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native