You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:40 UTC
[34/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
index 16dd37f..a6c2e9d 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
@@ -1,139 +1,139 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.audio;
-
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import javax.sound.sampled.AudioFileFormat;
-import javax.sound.sampled.AudioFileFormat.Type;
-import javax.sound.sampled.AudioFormat;
-import javax.sound.sampled.AudioSystem;
-import javax.sound.sampled.UnsupportedAudioFileException;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.XMPDM;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class AudioParser extends AbstractParser {
-
- /** Serial version UID */
- private static final long serialVersionUID = -6015684081240882695L;
-
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
- MediaType.audio("basic"),
- MediaType.audio("x-wav"),
- MediaType.audio("x-aiff"))));
-
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- public void parse(
- InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context)
- throws IOException, SAXException, TikaException {
- // AudioSystem expects the stream to support the mark feature
- if (!stream.markSupported()) {
- stream = new BufferedInputStream(stream);
- }
- try {
- AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(stream);
- Type type = fileFormat.getType();
- if (type == Type.AIFC || type == Type.AIFF) {
- metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
- } else if (type == Type.AU || type == Type.SND) {
- metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
- } else if (type == Type.WAVE) {
- metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
- }
-
- AudioFormat audioFormat = fileFormat.getFormat();
- int channels = audioFormat.getChannels();
- if (channels != AudioSystem.NOT_SPECIFIED) {
- metadata.set("channels", String.valueOf(channels));
- // TODO: Use XMPDM.TRACKS? (see also frame rate in AudioFormat)
- }
- float rate = audioFormat.getSampleRate();
- if (rate != AudioSystem.NOT_SPECIFIED) {
- metadata.set("samplerate", String.valueOf(rate));
- metadata.set(
- XMPDM.AUDIO_SAMPLE_RATE,
- Integer.toString((int) rate));
- }
- int bits = audioFormat.getSampleSizeInBits();
- if (bits != AudioSystem.NOT_SPECIFIED) {
- metadata.set("bits", String.valueOf(bits));
- if (bits == 8) {
- metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
- } else if (bits == 16) {
- metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
- } else if (bits == 32) {
- metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
- }
- }
- metadata.set("encoding", audioFormat.getEncoding().toString());
-
- // Javadoc suggests that some of the following properties might
- // be available, but I had no success in finding any:
-
- // "duration" Long playback duration of the file in microseconds
- // "author" String name of the author of this file
- // "title" String title of this file
- // "copyright" String copyright message
- // "date" Date date of the recording or release
- // "comment" String an arbitrary text
-
- addMetadata(metadata, fileFormat.properties());
- addMetadata(metadata, audioFormat.properties());
- } catch (UnsupportedAudioFileException e) {
- // There is no way to know whether this exception was
- // caused by the document being corrupted or by the format
- // just being unsupported. So we do nothing.
- }
-
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
- xhtml.endDocument();
- }
-
- private void addMetadata(Metadata metadata, Map<String, Object> properties) {
- if (properties != null) {
- for (Entry<String, Object> entry : properties.entrySet()) {
- Object value = entry.getValue();
- if (value != null) {
- metadata.set(entry.getKey(), value.toString());
- }
- }
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioFileFormat.Type;
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.UnsupportedAudioFileException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class AudioParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -6015684081240882695L;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ MediaType.audio("basic"),
+ MediaType.audio("x-wav"),
+ MediaType.audio("x-aiff"))));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ // AudioSystem expects the stream to support the mark feature
+ if (!stream.markSupported()) {
+ stream = new BufferedInputStream(stream);
+ }
+ try {
+ AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(stream);
+ Type type = fileFormat.getType();
+ if (type == Type.AIFC || type == Type.AIFF) {
+ metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
+ } else if (type == Type.AU || type == Type.SND) {
+ metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
+ } else if (type == Type.WAVE) {
+ metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
+ }
+
+ AudioFormat audioFormat = fileFormat.getFormat();
+ int channels = audioFormat.getChannels();
+ if (channels != AudioSystem.NOT_SPECIFIED) {
+ metadata.set("channels", String.valueOf(channels));
+ // TODO: Use XMPDM.TRACKS? (see also frame rate in AudioFormat)
+ }
+ float rate = audioFormat.getSampleRate();
+ if (rate != AudioSystem.NOT_SPECIFIED) {
+ metadata.set("samplerate", String.valueOf(rate));
+ metadata.set(
+ XMPDM.AUDIO_SAMPLE_RATE,
+ Integer.toString((int) rate));
+ }
+ int bits = audioFormat.getSampleSizeInBits();
+ if (bits != AudioSystem.NOT_SPECIFIED) {
+ metadata.set("bits", String.valueOf(bits));
+ if (bits == 8) {
+ metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
+ } else if (bits == 16) {
+ metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
+ } else if (bits == 32) {
+ metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
+ }
+ }
+ metadata.set("encoding", audioFormat.getEncoding().toString());
+
+ // Javadoc suggests that some of the following properties might
+ // be available, but I had no success in finding any:
+
+ // "duration" Long playback duration of the file in microseconds
+ // "author" String name of the author of this file
+ // "title" String title of this file
+ // "copyright" String copyright message
+ // "date" Date date of the recording or release
+ // "comment" String an arbitrary text
+
+ addMetadata(metadata, fileFormat.properties());
+ addMetadata(metadata, audioFormat.properties());
+ } catch (UnsupportedAudioFileException e) {
+ // There is no way to know whether this exception was
+ // caused by the document being corrupted or by the format
+ // just being unsupported. So we do nothing.
+ }
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+
+ private void addMetadata(Metadata metadata, Map<String, Object> properties) {
+ if (properties != null) {
+ for (Entry<String, Object> entry : properties.entrySet()) {
+ Object value = entry.getValue();
+ if (value != null) {
+ metadata.set(entry.getKey(), value.toString());
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
index c777287..656d1aa 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
@@ -1,121 +1,121 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.audio;
-
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
-import javax.sound.midi.InvalidMidiDataException;
-import javax.sound.midi.MetaMessage;
-import javax.sound.midi.MidiMessage;
-import javax.sound.midi.MidiSystem;
-import javax.sound.midi.Patch;
-import javax.sound.midi.Sequence;
-import javax.sound.midi.Track;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import static java.nio.charset.StandardCharsets.ISO_8859_1;
-
-public class MidiParser extends AbstractParser {
-
- /** Serial version UID */
- private static final long serialVersionUID = 6343278584336189432L;
-
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
- MediaType.application("x-midi"),
- MediaType.audio("midi"))));
-
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- public void parse(
- InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context)
- throws IOException, SAXException, TikaException {
- metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
-
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
-
- // MidiSystem expects the stream to support the mark feature
- InputStream buffered = new BufferedInputStream(stream);
- try {
- Sequence sequence = MidiSystem.getSequence(buffered);
-
- Track[] tracks = sequence.getTracks();
- metadata.set("tracks", String.valueOf(tracks.length));
- // TODO: Use XMPDM.TRACKS?
-
- Patch[] patches = sequence.getPatchList();
- metadata.set("patches", String.valueOf(patches.length));
-
- float type = sequence.getDivisionType();
- if (type == Sequence.PPQ) {
- metadata.set("divisionType", "PPQ");
- } else if (type == Sequence.SMPTE_24) {
- metadata.set("divisionType", "SMPTE_24");
- } else if (type == Sequence.SMPTE_25) {
- metadata.set("divisionType", "SMPTE_25");
- } else if (type == Sequence.SMPTE_30) {
- metadata.set("divisionType", "SMPTE_30");
- } else if (type == Sequence.SMPTE_30DROP) {
- metadata.set("divisionType", "SMPTE_30DROP");
- } else if (type == Sequence.SMPTE_24) {
- metadata.set("divisionType", String.valueOf(type));
- }
-
- for (Track track : tracks) {
- xhtml.startElement("p");
- for (int i = 0; i < track.size(); i++) {
- MidiMessage message = track.get(i).getMessage();
- if (message instanceof MetaMessage) {
- MetaMessage meta = (MetaMessage) message;
- // Types 1-15 are reserved for text events
- if (meta.getType() >= 1 && meta.getType() <= 15) {
- // FIXME: What's the encoding?
- xhtml.characters(
- new String(meta.getData(), ISO_8859_1));
- }
- }
- }
- xhtml.endElement("p");
- }
- } catch (InvalidMidiDataException ignore) {
- // There is no way to know whether this exception was
- // caused by the document being corrupted or by the format
- // just being unsupported. So we do nothing.
- }
-
- xhtml.endDocument();
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.sound.midi.InvalidMidiDataException;
+import javax.sound.midi.MetaMessage;
+import javax.sound.midi.MidiMessage;
+import javax.sound.midi.MidiSystem;
+import javax.sound.midi.Patch;
+import javax.sound.midi.Sequence;
+import javax.sound.midi.Track;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
+public class MidiParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = 6343278584336189432L;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ MediaType.application("x-midi"),
+ MediaType.audio("midi"))));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+
+ // MidiSystem expects the stream to support the mark feature
+ InputStream buffered = new BufferedInputStream(stream);
+ try {
+ Sequence sequence = MidiSystem.getSequence(buffered);
+
+ Track[] tracks = sequence.getTracks();
+ metadata.set("tracks", String.valueOf(tracks.length));
+ // TODO: Use XMPDM.TRACKS?
+
+ Patch[] patches = sequence.getPatchList();
+ metadata.set("patches", String.valueOf(patches.length));
+
+ float type = sequence.getDivisionType();
+ if (type == Sequence.PPQ) {
+ metadata.set("divisionType", "PPQ");
+ } else if (type == Sequence.SMPTE_24) {
+ metadata.set("divisionType", "SMPTE_24");
+ } else if (type == Sequence.SMPTE_25) {
+ metadata.set("divisionType", "SMPTE_25");
+ } else if (type == Sequence.SMPTE_30) {
+ metadata.set("divisionType", "SMPTE_30");
+ } else if (type == Sequence.SMPTE_30DROP) {
+ metadata.set("divisionType", "SMPTE_30DROP");
+ } else if (type == Sequence.SMPTE_24) {
+ metadata.set("divisionType", String.valueOf(type));
+ }
+
+ for (Track track : tracks) {
+ xhtml.startElement("p");
+ for (int i = 0; i < track.size(); i++) {
+ MidiMessage message = track.get(i).getMessage();
+ if (message instanceof MetaMessage) {
+ MetaMessage meta = (MetaMessage) message;
+ // Types 1-15 are reserved for text events
+ if (meta.getType() >= 1 && meta.getType() <= 15) {
+ // FIXME: What's the encoding?
+ xhtml.characters(
+ new String(meta.getData(), ISO_8859_1));
+ }
+ }
+ }
+ xhtml.endElement("p");
+ }
+ } catch (InvalidMidiDataException ignore) {
+ // There is no way to know whether this exception was
+ // caused by the document being corrupted or by the format
+ // just being unsupported. So we do nothing.
+ }
+
+ xhtml.endDocument();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
index d8a6539..c207e0b 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
@@ -1,111 +1,111 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.font;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.fontbox.ttf.NameRecord;
-import org.apache.fontbox.ttf.NamingTable;
-import org.apache.fontbox.ttf.TTFParser;
-import org.apache.fontbox.ttf.TrueTypeFont;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Parser for TrueType font files (TTF).
- */
-public class TrueTypeParser extends AbstractParser {
-
- /** Serial version UID */
- private static final long serialVersionUID = 44788554612243032L;
-
- private static final MediaType TYPE =
- MediaType.application("x-font-ttf");
-
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.singleton(TYPE);
-
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- public void parse(
- InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context)
- throws IOException, SAXException, TikaException {
- TikaInputStream tis = TikaInputStream.cast(stream);
-
- // Ask FontBox to parse the file for us
- TrueTypeFont font;
- TTFParser parser = new TTFParser();
- if (tis != null && tis.hasFile()) {
- font = parser.parse(tis.getFile());
- } else {
- font = parser.parse(stream);
- }
-
- // Report the details of the font
- metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
- metadata.set(TikaCoreProperties.CREATED,
- font.getHeader().getCreated());
- metadata.set(TikaCoreProperties.MODIFIED,
- font.getHeader().getModified());
- metadata.set(AdobeFontMetricParser.MET_DOC_VERSION,
- Float.toString(font.getHeader().getVersion()));
-
- // Pull out the naming info
- NamingTable fontNaming = font.getNaming();
- for (NameRecord nr : fontNaming.getNameRecords()) {
- if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) {
- metadata.set(AdobeFontMetricParser.MET_FONT_FAMILY_NAME, nr.getString());
- }
- if (nr.getNameId() == NameRecord.NAME_FONT_SUB_FAMILY_NAME) {
- metadata.set(AdobeFontMetricParser.MET_FONT_SUB_FAMILY_NAME, nr.getString());
- }
- if (nr.getNameId() == NameRecord.NAME_FULL_FONT_NAME) {
- metadata.set(AdobeFontMetricParser.MET_FONT_NAME, nr.getString());
- metadata.set(TikaCoreProperties.TITLE, nr.getString());
- }
- if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) {
- metadata.set(AdobeFontMetricParser.MET_PS_NAME, nr.getString());
- }
- if (nr.getNameId() == NameRecord.NAME_COPYRIGHT) {
- metadata.set("Copyright", nr.getString());
- }
- if (nr.getNameId() == NameRecord.NAME_TRADEMARK) {
- metadata.set("Trademark", nr.getString());
- }
- }
-
- // For now, we only output metadata, no textual contents
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
- xhtml.endDocument();
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.font;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.fontbox.ttf.NameRecord;
+import org.apache.fontbox.ttf.NamingTable;
+import org.apache.fontbox.ttf.TTFParser;
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for TrueType font files (TTF).
+ */
+public class TrueTypeParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = 44788554612243032L;
+
+ private static final MediaType TYPE =
+ MediaType.application("x-font-ttf");
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(TYPE);
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ TikaInputStream tis = TikaInputStream.cast(stream);
+
+ // Ask FontBox to parse the file for us
+ TrueTypeFont font;
+ TTFParser parser = new TTFParser();
+ if (tis != null && tis.hasFile()) {
+ font = parser.parse(tis.getFile());
+ } else {
+ font = parser.parse(stream);
+ }
+
+ // Report the details of the font
+ metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+ metadata.set(TikaCoreProperties.CREATED,
+ font.getHeader().getCreated());
+ metadata.set(TikaCoreProperties.MODIFIED,
+ font.getHeader().getModified());
+ metadata.set(AdobeFontMetricParser.MET_DOC_VERSION,
+ Float.toString(font.getHeader().getVersion()));
+
+ // Pull out the naming info
+ NamingTable fontNaming = font.getNaming();
+ for (NameRecord nr : fontNaming.getNameRecords()) {
+ if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_FAMILY_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_FONT_SUB_FAMILY_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_SUB_FAMILY_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_FULL_FONT_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_NAME, nr.getString());
+ metadata.set(TikaCoreProperties.TITLE, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_PS_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_COPYRIGHT) {
+ metadata.set("Copyright", nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_TRADEMARK) {
+ metadata.set("Trademark", nr.getString());
+ }
+ }
+
+ // For now, we only output metadata, no textual contents
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index 39044d3..bf29d0b 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -1,562 +1,562 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.image;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.drew.imaging.jpeg.JpegMetadataReader;
-import com.drew.imaging.jpeg.JpegProcessingException;
-import com.drew.imaging.riff.RiffProcessingException;
-import com.drew.imaging.tiff.TiffMetadataReader;
-import com.drew.imaging.tiff.TiffProcessingException;
-import com.drew.imaging.webp.WebpMetadataReader;
-import com.drew.lang.ByteArrayReader;
-import com.drew.lang.GeoLocation;
-import com.drew.lang.Rational;
-import com.drew.metadata.Directory;
-import com.drew.metadata.MetadataException;
-import com.drew.metadata.Tag;
-import com.drew.metadata.exif.ExifIFD0Directory;
-import com.drew.metadata.exif.ExifReader;
-import com.drew.metadata.exif.ExifSubIFDDirectory;
-import com.drew.metadata.exif.ExifThumbnailDirectory;
-import com.drew.metadata.exif.GpsDirectory;
-import com.drew.metadata.iptc.IptcDirectory;
-import com.drew.metadata.jpeg.JpegCommentDirectory;
-import com.drew.metadata.jpeg.JpegDirectory;
-import org.apache.commons.io.IOUtils;
-import org.apache.jempbox.xmp.XMPMetadata;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.IPTC;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.xmp.JempboxExtractor;
-import org.w3c.dom.Document;
-import org.xml.sax.SAXException;
-
-/**
- * Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
- * to read EXIF and IPTC image metadata and map to Tika fields.
- * <p/>
- * As of 2.4.0 the library supports jpeg and tiff.
- * As of 2.8.0 the library supports webp.
- */
-public class ImageMetadataExtractor {
-
- private static final String GEO_DECIMAL_FORMAT_STRING = "#.######"; // 6 dp seems to be reasonable
- private final Metadata metadata;
- private DirectoryHandler[] handlers;
-
- /**
- * @param metadata to extract to, using default directory handlers
- */
- public ImageMetadataExtractor(Metadata metadata) {
- this(metadata,
- new CopyUnknownFieldsHandler(),
- new JpegCommentHandler(),
- new ExifHandler(),
- new DimensionsHandler(),
- new GeotagHandler(),
- new IptcHandler()
- );
- }
-
- /**
- * @param metadata to extract to
- * @param handlers handlers in order, note that handlers may override values from earlier handlers
- */
- public ImageMetadataExtractor(Metadata metadata, DirectoryHandler... handlers) {
- this.metadata = metadata;
- this.handlers = handlers;
- }
-
- private static String trimPixels(String s) {
- //if height/width appears as "100 pixels", trim " pixels"
- if (s != null) {
- int i = s.lastIndexOf(" pixels");
- s = s.substring(0, i);
- }
- return s;
- }
-
- public void parseJpeg(File file)
- throws IOException, SAXException, TikaException {
- try {
- com.drew.metadata.Metadata jpegMetadata = JpegMetadataReader.readMetadata(file);
- handle(jpegMetadata);
- } catch (JpegProcessingException e) {
- throw new TikaException("Can't read JPEG metadata", e);
- } catch (MetadataException e) {
- throw new TikaException("Can't read JPEG metadata", e);
- }
- }
-
- public void parseTiff(File file)
- throws IOException, SAXException, TikaException {
- try {
- com.drew.metadata.Metadata tiffMetadata = TiffMetadataReader.readMetadata(file);
- handle(tiffMetadata);
- } catch (MetadataException e) {
- throw new TikaException("Can't read TIFF metadata", e);
- } catch (TiffProcessingException e) {
- throw new TikaException("Can't read TIFF metadata", e);
- }
- }
-
- public void parseWebP(File file) throws IOException, TikaException {
-
- try {
- com.drew.metadata.Metadata webPMetadata = new com.drew.metadata.Metadata();
- webPMetadata = WebpMetadataReader.readMetadata(file);
- handle(webPMetadata);
- } catch (IOException e) {
- throw e;
- } catch (RiffProcessingException e) {
- throw new TikaException("Can't process Riff data", e);
- } catch (MetadataException e) {
- throw new TikaException("Can't process Riff data", e);
- }
- }
-
- public void parseRawExif(InputStream stream, int length, boolean needsExifHeader)
- throws IOException, SAXException, TikaException {
- byte[] exif;
- if (needsExifHeader) {
- exif = new byte[length + 6];
- exif[0] = (byte) 'E';
- exif[1] = (byte) 'x';
- exif[2] = (byte) 'i';
- exif[3] = (byte) 'f';
- IOUtils.readFully(stream, exif, 6, length);
- } else {
- exif = new byte[length];
- IOUtils.readFully(stream, exif, 0, length);
- }
- parseRawExif(exif);
- }
-
- public void parseRawExif(byte[] exifData)
- throws IOException, SAXException, TikaException {
- com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
- ExifReader reader = new ExifReader();
- reader.extract(new ByteArrayReader(exifData), metadata, ExifReader.JPEG_SEGMENT_PREAMBLE.length());
-
- try {
- handle(metadata);
- } catch (MetadataException e) {
- throw new TikaException("Can't process the EXIF Data", e);
- }
- }
-
- public void parseRawXMP(byte[] xmpData)
- throws IOException, SAXException, TikaException {
- XMPMetadata xmp = null;
- try (InputStream decoded =
- new ByteArrayInputStream(xmpData)
- ) {
- Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
- if (dom != null) {
- xmp = new XMPMetadata(dom);
- }
- } catch (IOException|SAXException e) {
- //
- }
- if (xmp != null) {
- JempboxExtractor.extractDublinCore(xmp, metadata);
- JempboxExtractor.extractXMPMM(xmp, metadata);
- }
- }
-
- /**
- * Copies extracted tags to tika metadata using registered handlers.
- *
- * @param metadataExtractor Tag directories from a Metadata Extractor "reader"
- * @throws MetadataException This method does not handle exceptions from Metadata Extractor
- */
- protected void handle(com.drew.metadata.Metadata metadataExtractor)
- throws MetadataException {
- handle(metadataExtractor.getDirectories().iterator());
- }
-
- /**
- * Copies extracted tags to tika metadata using registered handlers.
- *
- * @param directories Metadata Extractor {@link com.drew.metadata.Directory} instances.
- * @throws MetadataException This method does not handle exceptions from Metadata Extractor
- */
- protected void handle(Iterator<Directory> directories) throws MetadataException {
- while (directories.hasNext()) {
- Directory directory = directories.next();
- for (DirectoryHandler handler : handlers) {
- if (handler.supports(directory.getClass())) {
- handler.handle(directory, metadata);
- }
- }
- }
- }
-
- /**
- * Reads one or more type of Metadata Extractor fields.
- */
- static interface DirectoryHandler {
- /**
- * @param directoryType A Metadata Extractor directory class
- * @return true if the directory type is supported by this handler
- */
- boolean supports(Class<? extends Directory> directoryType);
-
- /**
- * @param directory extracted tags
- * @param metadata current tika metadata
- * @throws MetadataException typically field extraction error, aborts all further extraction
- */
- void handle(Directory directory, Metadata metadata)
- throws MetadataException;
- }
-
- /**
- * Mimics the behavior from TIKA-314 of copying all extracted tags
- * to tika metadata using field names from Metadata Extractor.
- */
- static class CopyAllFieldsHandler implements DirectoryHandler {
- public boolean supports(Class<? extends Directory> directoryType) {
- return true;
- }
-
- public void handle(Directory directory, Metadata metadata)
- throws MetadataException {
- if (directory.getTags() != null) {
- for (Tag tag : directory.getTags()) {
- metadata.set(tag.getTagName(), tag.getDescription());
- }
- }
- }
- }
-
- /**
- * Copies all fields regardless of directory, if the tag name
- * is not identical to a known Metadata field name.
- * This leads to more predictable behavior than {@link CopyAllFieldsHandler}.
- */
- static class CopyUnknownFieldsHandler implements DirectoryHandler {
- public boolean supports(Class<? extends Directory> directoryType) {
- return true;
- }
-
- public void handle(Directory directory, Metadata metadata)
- throws MetadataException {
- if (directory.getTags() != null) {
- for (Tag tag : directory.getTags()) {
- String name = tag.getTagName();
- if (!MetadataFields.isMetadataField(name) && tag.getDescription() != null) {
- String value = tag.getDescription().trim();
- if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
- value = Boolean.TRUE.toString();
- } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
- value = Boolean.FALSE.toString();
- }
- metadata.set(name, value);
- }
- }
- }
- }
- }
-
- /**
- * Basic image properties for TIFF and JPEG, at least.
- */
- static class DimensionsHandler implements DirectoryHandler {
- private final Pattern LEADING_NUMBERS = Pattern.compile("(\\d+)\\s*.*");
-
- public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == JpegDirectory.class ||
- directoryType == ExifSubIFDDirectory.class ||
- directoryType == ExifThumbnailDirectory.class ||
- directoryType == ExifIFD0Directory.class;
- }
-
- public void handle(Directory directory, Metadata metadata) throws MetadataException {
- // The test TIFF has width and height stored as follows according to exiv2
- //Exif.Image.ImageWidth Short 1 100
- //Exif.Image.ImageLength Short 1 75
- // and the values are found in "Thumbnail Image Width" (and Height) from Metadata Extractor
- set(directory, metadata, JpegDirectory.TAG_IMAGE_WIDTH, Metadata.IMAGE_WIDTH);
- set(directory, metadata, JpegDirectory.TAG_IMAGE_HEIGHT, Metadata.IMAGE_LENGTH);
- // Bits per sample, two methods of extracting, exif overrides jpeg
- set(directory, metadata, JpegDirectory.TAG_DATA_PRECISION, Metadata.BITS_PER_SAMPLE);
- set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE, Metadata.BITS_PER_SAMPLE);
- // Straightforward
- set(directory, metadata, ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
- }
-
- private void set(Directory directory, Metadata metadata, int extractTag, Property metadataField) {
- if (directory.containsTag(extractTag)) {
- Matcher m = LEADING_NUMBERS.matcher(directory.getString(extractTag));
- if (m.matches()) {
- metadata.set(metadataField, m.group(1));
- }
- }
- }
- }
-
- static class JpegCommentHandler implements DirectoryHandler {
- public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == JpegCommentDirectory.class;
- }
-
- public void handle(Directory directory, Metadata metadata) throws MetadataException {
- if (directory.containsTag(JpegCommentDirectory.TAG_COMMENT)) {
- metadata.add(TikaCoreProperties.COMMENTS, directory.getString(JpegCommentDirectory.TAG_COMMENT));
- }
- }
- }
-
- static class ExifHandler implements DirectoryHandler {
- // There's a new ExifHandler for each file processed, so this is thread safe
- private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ = new ThreadLocal<SimpleDateFormat>() {
- @Override
- protected SimpleDateFormat initialValue() {
- return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
- }
- };
-
- public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == ExifIFD0Directory.class ||
- directoryType == ExifSubIFDDirectory.class;
- }
-
- public void handle(Directory directory, Metadata metadata) {
- try {
- handleDateTags(directory, metadata);
- handlePhotoTags(directory, metadata);
- handleCommentTags(directory, metadata);
- } catch (MetadataException e) {
- // ignore date parse errors and proceed with other tags
- }
- }
-
- /**
- * EXIF may contain image description, although with undefined encoding.
- * Use IPTC for other annotation fields, and XMP for unicode support.
- */
- public void handleCommentTags(Directory directory, Metadata metadata) {
- if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
- directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
- metadata.set(TikaCoreProperties.DESCRIPTION,
- directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
- }
- }
-
- /**
- * Maps common TIFF and EXIF tags onto the Tika
- * TIFF image metadata namespace.
- */
- public void handlePhotoTags(Directory directory, Metadata metadata) {
- if (directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
- Object exposure = directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
- if (exposure instanceof Rational) {
- metadata.set(Metadata.EXPOSURE_TIME, ((Rational) exposure).doubleValue());
- } else {
- metadata.set(Metadata.EXPOSURE_TIME, directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
- }
- }
-
- if (directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
- String flash = directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
- if (flash != null) {
- if (flash.contains("Flash fired")) {
- metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
- } else if (flash.contains("Flash did not fire")) {
- metadata.set(Metadata.FLASH_FIRED, Boolean.FALSE.toString());
- } else {
- metadata.set(Metadata.FLASH_FIRED, flash);
- }
- }
- }
-
- if (directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
- Object fnumber = directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
- if (fnumber instanceof Rational) {
- metadata.set(Metadata.F_NUMBER, ((Rational) fnumber).doubleValue());
- } else {
- metadata.set(Metadata.F_NUMBER, directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
- }
- }
-
- if (directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
- Object length = directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
- if (length instanceof Rational) {
- metadata.set(Metadata.FOCAL_LENGTH, ((Rational) length).doubleValue());
- } else {
- metadata.set(Metadata.FOCAL_LENGTH, directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
- }
- }
-
- if (directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
- metadata.set(Metadata.ISO_SPEED_RATINGS, directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
- }
-
- if (directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
- metadata.set(Metadata.EQUIPMENT_MAKE, directory.getString(ExifIFD0Directory.TAG_MAKE));
- }
- if (directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
- metadata.set(Metadata.EQUIPMENT_MODEL, directory.getString(ExifIFD0Directory.TAG_MODEL));
- }
-
- if (directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
- Object length = directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
- if (length instanceof Integer) {
- metadata.set(Metadata.ORIENTATION, Integer.toString((Integer) length));
- } else {
- metadata.set(Metadata.ORIENTATION, directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
- }
- }
-
- if (directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
- metadata.set(Metadata.SOFTWARE, directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
- }
-
- if (directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
- Object resolution = directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
- if (resolution instanceof Rational) {
- metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational) resolution).doubleValue());
- } else {
- metadata.set(Metadata.RESOLUTION_HORIZONTAL, directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
- }
- }
- if (directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
- Object resolution = directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
- if (resolution instanceof Rational) {
- metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational) resolution).doubleValue());
- } else {
- metadata.set(Metadata.RESOLUTION_VERTICAL, directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
- }
- }
- if (directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
- metadata.set(Metadata.RESOLUTION_UNIT, directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
- }
- if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)) {
- metadata.set(Metadata.IMAGE_WIDTH,
- trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)));
- }
- if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)) {
- metadata.set(Metadata.IMAGE_LENGTH,
- trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
- }
- }
-
- /**
- * Maps exif dates to metadata fields.
- */
- public void handleDateTags(Directory directory, Metadata metadata)
- throws MetadataException {
- //TODO: should we try to process ExifSubIFDDirectory.TAG_TIME_ZONE_OFFSET
- //if it exists?
- // Date/Time Original overrides value from ExifDirectory.TAG_DATETIME
- Date original = null;
- if (directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
- original = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
- // Unless we have GPS time we don't know the time zone so date must be set
- // as ISO 8601 datetime without timezone suffix (no Z or +/-)
- if (original != null) {
- String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata Extractor uses
- metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
- metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
- }
- }
- if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
- Date datetime = directory.getDate(ExifIFD0Directory.TAG_DATETIME);
- if (datetime != null) {
- String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(datetime);
- metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
- // If Date/Time Original does not exist this might be creation date
- if (metadata.get(TikaCoreProperties.CREATED) == null) {
- metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
- }
- }
- }
- }
- }
-
- /**
- * Reads image comments, originally TIKA-472.
- * Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
- */
- static class IptcHandler implements DirectoryHandler {
- public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == IptcDirectory.class;
- }
-
- public void handle(Directory directory, Metadata metadata)
- throws MetadataException {
- if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
- String[] keywords = directory.getStringArray(IptcDirectory.TAG_KEYWORDS);
- for (String k : keywords) {
- metadata.add(TikaCoreProperties.KEYWORDS, k);
- }
- }
- if (directory.containsTag(IptcDirectory.TAG_HEADLINE)) {
- metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_HEADLINE));
- } else if (directory.containsTag(IptcDirectory.TAG_OBJECT_NAME)) {
- metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_OBJECT_NAME));
- }
- if (directory.containsTag(IptcDirectory.TAG_BY_LINE)) {
- metadata.set(TikaCoreProperties.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
- metadata.set(IPTC.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
- }
- if (directory.containsTag(IptcDirectory.TAG_CAPTION)) {
- metadata.set(TikaCoreProperties.DESCRIPTION,
- // Looks like metadata extractor returns IPTC newlines as a single carriage return,
- // but the exiv2 command does not so we change to line feed here because that is less surprising to users
- directory.getString(IptcDirectory.TAG_CAPTION).replaceAll("\r\n?", "\n"));
- }
- }
- }
-
- /**
- * Maps EXIF Geo Tags onto the Tika Geo metadata namespace.
- */
- static class GeotagHandler implements DirectoryHandler {
- public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == GpsDirectory.class;
- }
-
- public void handle(Directory directory, Metadata metadata) throws MetadataException {
- GeoLocation geoLocation = ((GpsDirectory) directory).getGeoLocation();
- if (geoLocation != null) {
- DecimalFormat geoDecimalFormat = new DecimalFormat(GEO_DECIMAL_FORMAT_STRING,
- new DecimalFormatSymbols(Locale.ENGLISH));
- metadata.set(TikaCoreProperties.LATITUDE, geoDecimalFormat.format(geoLocation.getLatitude()));
- metadata.set(TikaCoreProperties.LONGITUDE, geoDecimalFormat.format(geoLocation.getLongitude()));
- }
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.drew.imaging.jpeg.JpegMetadataReader;
+import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.imaging.riff.RiffProcessingException;
+import com.drew.imaging.tiff.TiffMetadataReader;
+import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.imaging.webp.WebpMetadataReader;
+import com.drew.lang.ByteArrayReader;
+import com.drew.lang.GeoLocation;
+import com.drew.lang.Rational;
+import com.drew.metadata.Directory;
+import com.drew.metadata.MetadataException;
+import com.drew.metadata.Tag;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifReader;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.exif.ExifThumbnailDirectory;
+import com.drew.metadata.exif.GpsDirectory;
+import com.drew.metadata.iptc.IptcDirectory;
+import com.drew.metadata.jpeg.JpegCommentDirectory;
+import com.drew.metadata.jpeg.JpegDirectory;
+import org.apache.commons.io.IOUtils;
+import org.apache.jempbox.xmp.XMPMetadata;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.IPTC;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xmp.JempboxExtractor;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+/**
+ * Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
+ * to read EXIF and IPTC image metadata and map to Tika fields.
+ * <p/>
+ * As of 2.4.0 the library supports jpeg and tiff.
+ * As of 2.8.0 the library supports webp.
+ */
+public class ImageMetadataExtractor {
+
+ private static final String GEO_DECIMAL_FORMAT_STRING = "#.######"; // 6 dp seems to be reasonable
+ private final Metadata metadata;
+ private DirectoryHandler[] handlers;
+
+ /**
+ * @param metadata to extract to, using default directory handlers
+ */
+ public ImageMetadataExtractor(Metadata metadata) {
+ this(metadata,
+ new CopyUnknownFieldsHandler(),
+ new JpegCommentHandler(),
+ new ExifHandler(),
+ new DimensionsHandler(),
+ new GeotagHandler(),
+ new IptcHandler()
+ );
+ }
+
+ /**
+ * @param metadata to extract to
+ * @param handlers handlers in order, note that handlers may override values from earlier handlers
+ */
+ public ImageMetadataExtractor(Metadata metadata, DirectoryHandler... handlers) {
+ this.metadata = metadata;
+ this.handlers = handlers;
+ }
+
+ private static String trimPixels(String s) {
+ //if height/width appears as "100 pixels", trim " pixels"
+ if (s != null) {
+ int i = s.lastIndexOf(" pixels");
+ s = s.substring(0, i);
+ }
+ return s;
+ }
+
+ public void parseJpeg(File file)
+ throws IOException, SAXException, TikaException {
+ try {
+ com.drew.metadata.Metadata jpegMetadata = JpegMetadataReader.readMetadata(file);
+ handle(jpegMetadata);
+ } catch (JpegProcessingException e) {
+ throw new TikaException("Can't read JPEG metadata", e);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't read JPEG metadata", e);
+ }
+ }
+
+ public void parseTiff(File file)
+ throws IOException, SAXException, TikaException {
+ try {
+ com.drew.metadata.Metadata tiffMetadata = TiffMetadataReader.readMetadata(file);
+ handle(tiffMetadata);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't read TIFF metadata", e);
+ } catch (TiffProcessingException e) {
+ throw new TikaException("Can't read TIFF metadata", e);
+ }
+ }
+
+ public void parseWebP(File file) throws IOException, TikaException {
+
+ try {
+ com.drew.metadata.Metadata webPMetadata = new com.drew.metadata.Metadata();
+ webPMetadata = WebpMetadataReader.readMetadata(file);
+ handle(webPMetadata);
+ } catch (IOException e) {
+ throw e;
+ } catch (RiffProcessingException e) {
+ throw new TikaException("Can't process Riff data", e);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't process Riff data", e);
+ }
+ }
+
+ public void parseRawExif(InputStream stream, int length, boolean needsExifHeader)
+ throws IOException, SAXException, TikaException {
+ byte[] exif;
+ if (needsExifHeader) {
+ exif = new byte[length + 6];
+ exif[0] = (byte) 'E';
+ exif[1] = (byte) 'x';
+ exif[2] = (byte) 'i';
+ exif[3] = (byte) 'f';
+ IOUtils.readFully(stream, exif, 6, length);
+ } else {
+ exif = new byte[length];
+ IOUtils.readFully(stream, exif, 0, length);
+ }
+ parseRawExif(exif);
+ }
+
+ public void parseRawExif(byte[] exifData)
+ throws IOException, SAXException, TikaException {
+ com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
+ ExifReader reader = new ExifReader();
+ reader.extract(new ByteArrayReader(exifData), metadata, ExifReader.JPEG_SEGMENT_PREAMBLE.length());
+
+ try {
+ handle(metadata);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't process the EXIF Data", e);
+ }
+ }
+
+ public void parseRawXMP(byte[] xmpData)
+ throws IOException, SAXException, TikaException {
+ XMPMetadata xmp = null;
+ try (InputStream decoded =
+ new ByteArrayInputStream(xmpData)
+ ) {
+ Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
+ if (dom != null) {
+ xmp = new XMPMetadata(dom);
+ }
+ } catch (IOException|SAXException e) {
+ //
+ }
+ if (xmp != null) {
+ JempboxExtractor.extractDublinCore(xmp, metadata);
+ JempboxExtractor.extractXMPMM(xmp, metadata);
+ }
+ }
+
+ /**
+ * Copies extracted tags to tika metadata using registered handlers.
+ *
+ * @param metadataExtractor Tag directories from a Metadata Extractor "reader"
+ * @throws MetadataException This method does not handle exceptions from Metadata Extractor
+ */
+ protected void handle(com.drew.metadata.Metadata metadataExtractor)
+ throws MetadataException {
+ handle(metadataExtractor.getDirectories().iterator());
+ }
+
+ /**
+ * Copies extracted tags to tika metadata using registered handlers.
+ *
+ * @param directories Metadata Extractor {@link com.drew.metadata.Directory} instances.
+ * @throws MetadataException This method does not handle exceptions from Metadata Extractor
+ */
+ protected void handle(Iterator<Directory> directories) throws MetadataException {
+ while (directories.hasNext()) {
+ Directory directory = directories.next();
+ for (DirectoryHandler handler : handlers) {
+ if (handler.supports(directory.getClass())) {
+ handler.handle(directory, metadata);
+ }
+ }
+ }
+ }
+
+ /**
+ * Reads one or more type of Metadata Extractor fields.
+ */
+ static interface DirectoryHandler {
+ /**
+ * @param directoryType A Metadata Extractor directory class
+ * @return true if the directory type is supported by this handler
+ */
+ boolean supports(Class<? extends Directory> directoryType);
+
+ /**
+ * @param directory extracted tags
+ * @param metadata current tika metadata
+ * @throws MetadataException typically field extraction error, aborts all further extraction
+ */
+ void handle(Directory directory, Metadata metadata)
+ throws MetadataException;
+ }
+
+ /**
+ * Mimics the behavior from TIKA-314 of copying all extracted tags
+ * to tika metadata using field names from Metadata Extractor.
+ */
+ static class CopyAllFieldsHandler implements DirectoryHandler {
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return true;
+ }
+
+ public void handle(Directory directory, Metadata metadata)
+ throws MetadataException {
+ if (directory.getTags() != null) {
+ for (Tag tag : directory.getTags()) {
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
+ }
+ }
+ }
+
+ /**
+ * Copies all fields regardless of directory, if the tag name
+ * is not identical to a known Metadata field name.
+ * This leads to more predictable behavior than {@link CopyAllFieldsHandler}.
+ */
+ static class CopyUnknownFieldsHandler implements DirectoryHandler {
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return true;
+ }
+
+ public void handle(Directory directory, Metadata metadata)
+ throws MetadataException {
+ if (directory.getTags() != null) {
+ for (Tag tag : directory.getTags()) {
+ String name = tag.getTagName();
+ if (!MetadataFields.isMetadataField(name) && tag.getDescription() != null) {
+ String value = tag.getDescription().trim();
+ if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.TRUE.toString();
+ } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.FALSE.toString();
+ }
+ metadata.set(name, value);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Basic image properties for TIFF and JPEG, at least.
+ */
+ static class DimensionsHandler implements DirectoryHandler {
+ private final Pattern LEADING_NUMBERS = Pattern.compile("(\\d+)\\s*.*");
+
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return directoryType == JpegDirectory.class ||
+ directoryType == ExifSubIFDDirectory.class ||
+ directoryType == ExifThumbnailDirectory.class ||
+ directoryType == ExifIFD0Directory.class;
+ }
+
+ public void handle(Directory directory, Metadata metadata) throws MetadataException {
+ // The test TIFF has width and height stored as follows according to exiv2
+ //Exif.Image.ImageWidth Short 1 100
+ //Exif.Image.ImageLength Short 1 75
+ // and the values are found in "Thumbnail Image Width" (and Height) from Metadata Extractor
+ set(directory, metadata, JpegDirectory.TAG_IMAGE_WIDTH, Metadata.IMAGE_WIDTH);
+ set(directory, metadata, JpegDirectory.TAG_IMAGE_HEIGHT, Metadata.IMAGE_LENGTH);
+ // Bits per sample, two methods of extracting, exif overrides jpeg
+ set(directory, metadata, JpegDirectory.TAG_DATA_PRECISION, Metadata.BITS_PER_SAMPLE);
+ set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE, Metadata.BITS_PER_SAMPLE);
+ // Straightforward
+ set(directory, metadata, ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
+ }
+
+ private void set(Directory directory, Metadata metadata, int extractTag, Property metadataField) {
+ if (directory.containsTag(extractTag)) {
+ Matcher m = LEADING_NUMBERS.matcher(directory.getString(extractTag));
+ if (m.matches()) {
+ metadata.set(metadataField, m.group(1));
+ }
+ }
+ }
+ }
+
+ static class JpegCommentHandler implements DirectoryHandler {
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return directoryType == JpegCommentDirectory.class;
+ }
+
+ public void handle(Directory directory, Metadata metadata) throws MetadataException {
+ if (directory.containsTag(JpegCommentDirectory.TAG_COMMENT)) {
+ metadata.add(TikaCoreProperties.COMMENTS, directory.getString(JpegCommentDirectory.TAG_COMMENT));
+ }
+ }
+ }
+
+ static class ExifHandler implements DirectoryHandler {
+ // There's a new ExifHandler for each file processed, so this is thread safe
+ private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ = new ThreadLocal<SimpleDateFormat>() {
+ @Override
+ protected SimpleDateFormat initialValue() {
+ return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
+ }
+ };
+
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return directoryType == ExifIFD0Directory.class ||
+ directoryType == ExifSubIFDDirectory.class;
+ }
+
+ public void handle(Directory directory, Metadata metadata) {
+ try {
+ handleDateTags(directory, metadata);
+ handlePhotoTags(directory, metadata);
+ handleCommentTags(directory, metadata);
+ } catch (MetadataException e) {
+ // ignore date parse errors and proceed with other tags
+ }
+ }
+
+ /**
+ * EXIF may contain image description, although with undefined encoding.
+ * Use IPTC for other annotation fields, and XMP for unicode support.
+ */
+ public void handleCommentTags(Directory directory, Metadata metadata) {
+ if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
+ directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
+ metadata.set(TikaCoreProperties.DESCRIPTION,
+ directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
+ }
+ }
+
+ /**
+ * Maps common TIFF and EXIF tags onto the Tika
+ * TIFF image metadata namespace.
+ */
+ public void handlePhotoTags(Directory directory, Metadata metadata) {
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+ Object exposure = directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
+ if (exposure instanceof Rational) {
+ metadata.set(Metadata.EXPOSURE_TIME, ((Rational) exposure).doubleValue());
+ } else {
+ metadata.set(Metadata.EXPOSURE_TIME, directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+ String flash = directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
+ if (flash != null) {
+ if (flash.contains("Flash fired")) {
+ metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
+ } else if (flash.contains("Flash did not fire")) {
+ metadata.set(Metadata.FLASH_FIRED, Boolean.FALSE.toString());
+ } else {
+ metadata.set(Metadata.FLASH_FIRED, flash);
+ }
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+ Object fnumber = directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
+ if (fnumber instanceof Rational) {
+ metadata.set(Metadata.F_NUMBER, ((Rational) fnumber).doubleValue());
+ } else {
+ metadata.set(Metadata.F_NUMBER, directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+ Object length = directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
+ if (length instanceof Rational) {
+ metadata.set(Metadata.FOCAL_LENGTH, ((Rational) length).doubleValue());
+ } else {
+ metadata.set(Metadata.FOCAL_LENGTH, directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
+ }
+ }
+
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
+ metadata.set(Metadata.ISO_SPEED_RATINGS, directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+ metadata.set(Metadata.EQUIPMENT_MAKE, directory.getString(ExifIFD0Directory.TAG_MAKE));
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+ metadata.set(Metadata.EQUIPMENT_MODEL, directory.getString(ExifIFD0Directory.TAG_MODEL));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+ Object length = directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
+ if (length instanceof Integer) {
+ metadata.set(Metadata.ORIENTATION, Integer.toString((Integer) length));
+ } else {
+ metadata.set(Metadata.ORIENTATION, directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
+ }
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+ metadata.set(Metadata.SOFTWARE, directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
+ }
+
+ if (directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+ Object resolution = directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
+ if (resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational) resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL, directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
+ }
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+ Object resolution = directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
+ if (resolution instanceof Rational) {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational) resolution).doubleValue());
+ } else {
+ metadata.set(Metadata.RESOLUTION_VERTICAL, directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
+ }
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+ metadata.set(Metadata.RESOLUTION_UNIT, directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+ }
+ if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_WIDTH,
+ trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)));
+ }
+ if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)) {
+ metadata.set(Metadata.IMAGE_LENGTH,
+ trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
+ }
+ }
+
+ /**
+ * Maps exif dates to metadata fields.
+ */
+ public void handleDateTags(Directory directory, Metadata metadata)
+ throws MetadataException {
+ //TODO: should we try to process ExifSubIFDDirectory.TAG_TIME_ZONE_OFFSET
+ //if it exists?
+ // Date/Time Original overrides value from ExifDirectory.TAG_DATETIME
+ Date original = null;
+ if (directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
+ original = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
+ // Unless we have GPS time we don't know the time zone so date must be set
+ // as ISO 8601 datetime without timezone suffix (no Z or +/-)
+ if (original != null) {
+ String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata Extractor uses
+ metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
+ metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+ }
+ }
+ if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
+ Date datetime = directory.getDate(ExifIFD0Directory.TAG_DATETIME);
+ if (datetime != null) {
+ String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(datetime);
+ metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
+ // If Date/Time Original does not exist this might be creation date
+ if (metadata.get(TikaCoreProperties.CREATED) == null) {
+ metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Reads image comments, originally TIKA-472.
+ * Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
+ */
+ static class IptcHandler implements DirectoryHandler {
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return directoryType == IptcDirectory.class;
+ }
+
+ public void handle(Directory directory, Metadata metadata)
+ throws MetadataException {
+ if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
+ String[] keywords = directory.getStringArray(IptcDirectory.TAG_KEYWORDS);
+ for (String k : keywords) {
+ metadata.add(TikaCoreProperties.KEYWORDS, k);
+ }
+ }
+ if (directory.containsTag(IptcDirectory.TAG_HEADLINE)) {
+ metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_HEADLINE));
+ } else if (directory.containsTag(IptcDirectory.TAG_OBJECT_NAME)) {
+ metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_OBJECT_NAME));
+ }
+ if (directory.containsTag(IptcDirectory.TAG_BY_LINE)) {
+ metadata.set(TikaCoreProperties.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
+ metadata.set(IPTC.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
+ }
+ if (directory.containsTag(IptcDirectory.TAG_CAPTION)) {
+ metadata.set(TikaCoreProperties.DESCRIPTION,
+ // Looks like metadata extractor returns IPTC newlines as a single carriage return,
+ // but the exiv2 command does not so we change to line feed here because that is less surprising to users
+ directory.getString(IptcDirectory.TAG_CAPTION).replaceAll("\r\n?", "\n"));
+ }
+ }
+ }
+
+ /**
+ * Maps EXIF Geo Tags onto the Tika Geo metadata namespace.
+ */
+ static class GeotagHandler implements DirectoryHandler {
+ public boolean supports(Class<? extends Directory> directoryType) {
+ return directoryType == GpsDirectory.class;
+ }
+
+ public void handle(Directory directory, Metadata metadata) throws MetadataException {
+ GeoLocation geoLocation = ((GpsDirectory) directory).getGeoLocation();
+ if (geoLocation != null) {
+ DecimalFormat geoDecimalFormat = new DecimalFormat(GEO_DECIMAL_FORMAT_STRING,
+ new DecimalFormatSymbols(Locale.ENGLISH));
+ metadata.set(TikaCoreProperties.LATITUDE, geoDecimalFormat.format(geoLocation.getLatitude()));
+ metadata.set(TikaCoreProperties.LONGITUDE, geoDecimalFormat.format(geoLocation.getLongitude()));
+ }
+ }
+ }
+
+}