You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2015/12/29 00:22:47 UTC

svn commit: r1722029 [3/4] - in /tika/branches/2.x: tika-parser-modules/ tika-parser-modules/tika-multimedia-module/ tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/module/ tika-parser-modules/tika-multimedia-module/src/main/ja...

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TailStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.mp3.ID3Tags.ID3Comment;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * The <code>Mp3Parser</code> is used to parse ID3 Version 1 Tag information
+ * from an MP3 file, if available.
+ *
+ * @see <a href="http://www.id3.org/ID3v1">MP3 ID3 Version 1 specification</a>
+ * @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 Structure Specification</a>
+ * @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 Frames Specification</a>
+ */
+public class Mp3Parser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = 8537074922934844370L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(MediaType.audio("mpeg"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");
+        metadata.set(XMPDM.AUDIO_COMPRESSOR, "MP3");
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        // Create handlers for the various kinds of ID3 tags
+        ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);
+
+        // Process tags metadata if the file has supported tags
+        if (audioAndTags.tags.length > 0) {
+           CompositeTagHandler tag = new CompositeTagHandler(audioAndTags.tags);
+
+           metadata.set(TikaCoreProperties.TITLE, tag.getTitle());
+           metadata.set(TikaCoreProperties.CREATOR, tag.getArtist());
+           metadata.set(XMPDM.ARTIST, tag.getArtist());
+           metadata.set(XMPDM.ALBUM_ARTIST, tag.getAlbumArtist());
+           metadata.set(XMPDM.COMPOSER, tag.getComposer());
+           metadata.set(XMPDM.ALBUM, tag.getAlbum());
+           metadata.set(XMPDM.COMPILATION, tag.getCompilation());
+           metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
+           metadata.set(XMPDM.GENRE, tag.getGenre());
+
+           List<String> comments = new ArrayList<String>();
+           for (ID3Comment comment : tag.getComments()) {
+              StringBuffer cmt = new StringBuffer();
+              if (comment.getLanguage() != null) {
+                 cmt.append(comment.getLanguage());
+                 cmt.append(" - ");
+              }
+              if (comment.getDescription() != null) {
+                 cmt.append(comment.getDescription());
+                 if (comment.getText() != null) {
+                    cmt.append("\n");
+                 }
+              }
+              if (comment.getText() != null) {
+                 cmt.append(comment.getText());
+              }
+              
+              comments.add(cmt.toString());
+              metadata.add(XMPDM.LOG_COMMENT.getName(), cmt.toString());
+           }
+
+           xhtml.element("h1", tag.getTitle());
+           xhtml.element("p", tag.getArtist());
+
+            // ID3v1.1 Track addition
+            StringBuilder sb = new StringBuilder();
+            sb.append(tag.getAlbum());
+            if (tag.getTrackNumber() != null) {
+                sb.append(", track ").append(tag.getTrackNumber());
+                metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
+            }
+            if (tag.getDisc() != null) {
+                sb.append(", disc ").append(tag.getDisc());
+                metadata.set(XMPDM.DISC_NUMBER, tag.getDisc());
+            }
+            xhtml.element("p", sb.toString());
+            
+            xhtml.element("p", tag.getYear());
+            xhtml.element("p", tag.getGenre());
+            xhtml.element("p", String.valueOf(audioAndTags.duration));
+            for (String comment : comments) {
+               xhtml.element("p", comment);
+            }
+        }
+        if (audioAndTags.duration > 0) {
+            metadata.set(XMPDM.DURATION, audioAndTags.duration);
+        }
+        if (audioAndTags.audio != null) {
+            metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
+            metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
+            metadata.set("version", audioAndTags.audio.getVersion());
+            
+            metadata.set(
+                    XMPDM.AUDIO_SAMPLE_RATE,
+                    Integer.toString(audioAndTags.audio.getSampleRate()));
+            if(audioAndTags.audio.getChannels() == 1) {
+               metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Mono");
+            } else if(audioAndTags.audio.getChannels() == 2) {
+               metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Stereo");
+            } else if(audioAndTags.audio.getChannels() == 5) {
+               metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "5.1");
+            } else if(audioAndTags.audio.getChannels() == 7) {
+               metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "7.1");
+            }
+        }
+        if (audioAndTags.lyrics != null && audioAndTags.lyrics.hasLyrics()) {
+           xhtml.startElement("p", "class", "lyrics");
+           xhtml.characters(audioAndTags.lyrics.lyricsText);
+           xhtml.endElement("p");
+        }
+
+        xhtml.endDocument();
+    }
+
+    /**
+     * Scans the MP3 frames for ID3 tags, and creates ID3Tag Handlers
+     *  for each supported set of tags. 
+     */
+    protected static ID3TagsAndAudio getAllTagHandlers(InputStream stream, ContentHandler handler)
+           throws IOException, SAXException, TikaException {
+       ID3v24Handler v24 = null;
+       ID3v23Handler v23 = null;
+       ID3v22Handler v22 = null;
+       ID3v1Handler v1 = null;
+       LyricsHandler lyrics = null;
+       AudioFrame firstAudio = null;
+
+       TailStream tailStream = new TailStream(stream, 10240+128);
+       MpegStream mpegStream = new MpegStream(tailStream);
+
+       // ID3v2 tags live at the start of the file
+       // You can apparently have several different ID3 tag blocks
+       // So, keep going until we don't find any more
+       MP3Frame f;
+       while ((f = ID3v2Frame.createFrameIfPresent(mpegStream)) != null) {
+           if(f instanceof ID3v2Frame) {
+               ID3v2Frame id3F = (ID3v2Frame)f;
+               if (id3F.getMajorVersion() == 4) {
+                   v24 = new ID3v24Handler(id3F);
+               } else if(id3F.getMajorVersion() == 3) {
+                   v23 = new ID3v23Handler(id3F);
+               } else if(id3F.getMajorVersion() == 2) {
+                   v22 = new ID3v22Handler(id3F);
+               }
+           }
+       }
+
+        // Now iterate over all audio frames in the file
+        AudioFrame frame = mpegStream.nextFrame();
+        float duration = 0;
+        while (frame != null)
+        {
+            duration += frame.getDuration();
+            if (firstAudio == null)
+            {
+                firstAudio = frame;
+            }
+            mpegStream.skipFrame();
+            frame = mpegStream.nextFrame();
+        }
+
+       // ID3v1 tags live at the end of the file
+       // Lyrics live just before ID3v1, at the end of the file
+       // Search for both (handlers seek to the end for us)
+       lyrics = new LyricsHandler(tailStream.getTail());
+       v1 = lyrics.id3v1;
+
+       // Go in order of preference
+       // Currently, that's newest to oldest
+       List<ID3Tags> tags = new ArrayList<ID3Tags>();
+
+       if(v24 != null && v24.getTagsPresent()) {
+          tags.add(v24);
+       }
+       if(v23 != null && v23.getTagsPresent()) {
+          tags.add(v23);
+       }
+       if(v22 != null && v22.getTagsPresent()) {
+          tags.add(v22);
+       }
+       if(v1 != null && v1.getTagsPresent()) {
+          tags.add(v1);
+       }
+       
+       ID3TagsAndAudio ret = new ID3TagsAndAudio();
+       ret.audio = firstAudio;
+       ret.lyrics = lyrics;
+       ret.tags = tags.toArray(new ID3Tags[tags.size()]);
+       ret.duration = duration;
+       return ret;
+    }
+
+    protected static class ID3TagsAndAudio {
+        private ID3Tags[] tags;
+        private AudioFrame audio;
+        private LyricsHandler lyrics;
+        private float duration;
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MpegStream.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MpegStream.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MpegStream.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * <p>
+ * A specialized stream class which can be used to extract single frames of MPEG
+ * audio files.
+ * </p>
+ * <p>
+ * Instances of this class are constructed with an underlying stream which
+ * should point to an audio file. Read operations are possible in the usual way.
+ * However, there are special methods for searching and extracting headers of
+ * MPEG frames. Some meta information of frames can be queried.
+ * </p>
+ */
+class MpegStream extends PushbackInputStream
+{
+    /** Bit rate table for MPEG V1, layer 1. */
+    private static final int[] BIT_RATE_MPEG1_L1 = {
+            0, 32000, 64000, 96000, 128000, 160000, 192000, 224000, 256000,
+            288000, 320000, 352000, 384000, 416000, 448000
+    };
+
+    /** Bit rate table for MPEG V1, layer 2. */
+    private static final int[] BIT_RATE_MPEG1_L2 = {
+            0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            160000, 192000, 224000, 256000, 320000, 384000
+    };
+
+    /** Bit rate table for MPEG V1, layer 3. */
+    private static final int[] BIT_RATE_MPEG1_L3 = {
+            0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            160000, 192000, 224000, 256000, 320000
+    };
+
+    /** Bit rate table for MPEG V2/V2.5, layer 1. */
+    private static final int[] BIT_RATE_MPEG2_L1 = {
+            0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            144000, 160000, 176000, 192000, 224000, 256000
+    };
+
+    /** Bit rate table for MPEG V2/V2.5, layer 2 and 3. */
+    private static final int[] BIT_RATE_MPEG2_L2 = {
+            0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000,
+            96000, 112000, 128000, 144000, 160000
+    };
+
+    /** Sample rate table for MPEG V1. */
+    private static final int[] SAMPLE_RATE_MPEG1 = {
+            44100, 48000, 32000
+    };
+
+    /** Sample rate table for MPEG V2. */
+    private static final int[] SAMPLE_RATE_MPEG2 = {
+            22050, 24000, 16000
+    };
+
+    /** Sample rate table for MPEG V2.5. */
+    private static final int[] SAMPLE_RATE_MPEG2_5 = {
+            11025, 12000, 8000
+    };
+
+    /** Sample rate table for all MPEG versions. */
+    private static final int[][] SAMPLE_RATE = createSampleRateTable();
+
+    /** Constant for the number of samples for a layer 1 frame. */
+    private static final int SAMPLE_COUNT_L1 = 384;
+
+    /** Constant for the number of samples for a layer 2 or 3 frame. */
+    private static final int SAMPLE_COUNT_L2 = 1152;
+
+    /** Constant for the size of an MPEG frame header in bytes. */
+    private static final int HEADER_SIZE = 4;
+
+    /** The current MPEG header. */
+    private AudioFrame currentHeader;
+
+    /** A flag whether the end of the stream is reached. */
+    private boolean endOfStream;
+
+    /**
+     * Creates a new instance of {@code MpegStream} and initializes it with the
+     * underlying stream.
+     * 
+     * @param in the underlying audio stream
+     */
+    public MpegStream(InputStream in)
+    {
+        super(in, 2 * HEADER_SIZE);
+    }
+
+    /**
+     * Searches for the next MPEG frame header from the current stream position
+     * on. This method advances the underlying input stream until it finds a
+     * valid frame header or the end of the stream is reached. In the former
+     * case a corresponding {@code AudioFrame} object is created. In the latter
+     * case there are no more headers, so the end of the stream is probably
+     * reached.
+     * 
+     * @return the next {@code AudioFrame} or <b>null</b>
+     * @throws IOException if an IO error occurs
+     */
+    public AudioFrame nextFrame() throws IOException
+    {
+        AudioFrame frame = null;
+        while (!endOfStream && frame == null)
+        {
+            findFrameSyncByte();
+            if (!endOfStream)
+            {
+                HeaderBitField headerField = createHeaderField();
+                if (!endOfStream)
+                {
+                    frame = createHeader(headerField);
+                    if (frame == null)
+                    {
+                        pushBack(headerField);
+                    }
+                }
+            }
+        }
+
+        currentHeader = frame;
+        return frame;
+    }
+
+    /**
+     * Skips the current MPEG frame. This method can be called after a valid
+     * MPEG header has been retrieved using {@code nextFrame()}. In this case
+     * the underlying stream is advanced to the end of the associated MPEG
+     * frame. Otherwise, this method has no effect. The return value indicates
+     * whether a frame could be skipped.
+     * 
+     * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise
+     * @throws IOException if an IO error occurs
+     */
+    public boolean skipFrame() throws IOException
+    {
+        if (currentHeader != null)
+        {
+            skipStream(in, currentHeader.getLength() - HEADER_SIZE);
+            currentHeader = null;
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Advances the underlying stream until the first byte of frame sync is
+     * found.
+     * 
+     * @throws IOException if an error occurs
+     */
+    private void findFrameSyncByte() throws IOException
+    {
+        boolean found = false;
+        while (!found && !endOfStream)
+        {
+            if (nextByte() == 0xFF)
+            {
+                found = true;
+            }
+        }
+    }
+
+    /**
+     * Creates a bit field for the MPEG frame header.
+     * 
+     * @return the bit field
+     * @throws IOException if an error occurs
+     */
+    private HeaderBitField createHeaderField() throws IOException
+    {
+        HeaderBitField field = new HeaderBitField();
+        field.add(nextByte());
+        field.add(nextByte());
+        field.add(nextByte());
+        return field;
+    }
+
+    /**
+     * Creates an {@code AudioFrame} object based on the given header field. If
+     * the header field contains invalid values, result is <b>null</b>.
+     * 
+     * @param bits the header bit field
+     * @return the {@code AudioFrame}
+     */
+    private AudioFrame createHeader(HeaderBitField bits)
+    {
+        if (bits.get(21, 23) != 7)
+        {
+            return null;
+        }
+
+        int mpegVer = bits.get(19, 20);
+        int layer = bits.get(17, 18);
+        int bitRateCode = bits.get(12, 15);
+        int sampleRateCode = bits.get(10, 11);
+        int padding = bits.get(9);
+
+        if (mpegVer == 1 || layer == 0 || bitRateCode == 0 || bitRateCode == 15
+                || sampleRateCode == 3)
+        {
+            // invalid header values
+            return null;
+        }
+
+        int bitRate = calculateBitRate(mpegVer, layer, bitRateCode);
+        int sampleRate = calculateSampleRate(mpegVer, sampleRateCode);
+        int length = calculateFrameLength(layer, bitRate, sampleRate, padding);
+        float duration = calculateDuration(layer, sampleRate);
+        int channels = calculateChannels(bits.get(6, 7));
+        return new AudioFrame(mpegVer, layer, bitRate, sampleRate, channels,
+                length, duration);
+    }
+
+    /**
+     * Reads the next byte.
+     * 
+     * @return the next byte
+     * @throws IOException if an error occurs
+     */
+    private int nextByte() throws IOException
+    {
+        int result = 0;
+        if (!endOfStream)
+        {
+            result = read();
+            if (result == -1)
+            {
+                endOfStream = true;
+            }
+        }
+        return endOfStream ? 0 : result;
+    }
+
+    /**
+     * Pushes the given header field back in the stream so that the bytes are
+     * read again. This method is called if an invalid header was detected. Then
+     * search has to continue at the next byte after the frame sync byte.
+     * 
+     * @param field the header bit field with the invalid frame header
+     * @throws IOException if an error occurs
+     */
+    private void pushBack(HeaderBitField field) throws IOException
+    {
+        unread(field.toArray());
+    }
+
+    /**
+     * Skips the given number of bytes from the specified input stream.
+     * 
+     * @param in the input stream
+     * @param count the number of bytes to skip
+     * @throws IOException if an IO error occurs
+     */
+    private static void skipStream(InputStream in, long count)
+            throws IOException
+    {
+        long size = count;
+        long skipped = 0;
+        while (size > 0 && skipped >= 0)
+        {
+            skipped = in.skip(size);
+            if (skipped != -1)
+            {
+                size -= skipped;
+            }
+        }
+    }
+    
+    /**
+     * Calculates the bit rate based on the given parameters.
+     * 
+     * @param mpegVer the MPEG version
+     * @param layer the layer
+     * @param code the code for the bit rate
+     * @return the bit rate in bits per second
+     */
+    private static int calculateBitRate(int mpegVer, int layer, int code)
+    {
+        int[] arr = null;
+
+        if (mpegVer == AudioFrame.MPEG_V1)
+        {
+            switch (layer)
+            {
+            case AudioFrame.LAYER_1:
+                arr = BIT_RATE_MPEG1_L1;
+                break;
+            case AudioFrame.LAYER_2:
+                arr = BIT_RATE_MPEG1_L2;
+                break;
+            case AudioFrame.LAYER_3:
+                arr = BIT_RATE_MPEG1_L3;
+                break;
+            }
+        }
+        else
+        {
+            if (layer == AudioFrame.LAYER_1)
+            {
+                arr = BIT_RATE_MPEG2_L1;
+            }
+            else
+            {
+                arr = BIT_RATE_MPEG2_L2;
+            }
+        }
+        return arr[code];
+    }
+
+    /**
+     * Calculates the sample rate based on the given parameters.
+     * 
+     * @param mpegVer the MPEG version
+     * @param code the code for the sample rate
+     * @return the sample rate in samples per second
+     */
+    private static int calculateSampleRate(int mpegVer, int code)
+    {
+        return SAMPLE_RATE[mpegVer][code];
+    }
+
+    /**
+     * Calculates the length of an MPEG frame based on the given parameters.
+     * 
+     * @param layer the layer
+     * @param bitRate the bit rate
+     * @param sampleRate the sample rate
+     * @param padding the padding flag
+     * @return the length of the frame in bytes
+     */
+    private static int calculateFrameLength(int layer, int bitRate,
+            int sampleRate, int padding)
+    {
+        if (layer == AudioFrame.LAYER_1)
+        {
+            return (12 * bitRate / sampleRate + padding) * 4;
+        }
+        else
+        {
+            return 144 * bitRate / sampleRate + padding;
+        }
+    }
+
+    /**
+     * Calculates the duration of a MPEG frame based on the given parameters.
+     * 
+     * @param layer the layer
+     * @param sampleRate the sample rate
+     * @return the duration of this frame in milliseconds
+     */
+    private static float calculateDuration(int layer, int sampleRate)
+    {
+        int sampleCount =
+                (layer == AudioFrame.LAYER_1) ? SAMPLE_COUNT_L1
+                        : SAMPLE_COUNT_L2;
+        return (1000.0f / sampleRate) * sampleCount;
+    }
+
+    /**
+     * Calculates the number of channels based on the given parameters.
+     * 
+     * @param chan the code for the channels
+     * @return the number of channels
+     */
+    private static int calculateChannels(int chan)
+    {
+        return chan < 3 ? 2 : 1;
+    }
+
+    /**
+     * Creates the complete array for the sample rate mapping.
+     * 
+     * @return the table for the sample rates
+     */
+    private static int[][] createSampleRateTable()
+    {
+        int[][] arr = new int[4][];
+        arr[AudioFrame.MPEG_V1] = SAMPLE_RATE_MPEG1;
+        arr[AudioFrame.MPEG_V2] = SAMPLE_RATE_MPEG2;
+        arr[AudioFrame.MPEG_V2_5] = SAMPLE_RATE_MPEG2_5;
+        return arr;
+    }
+
+    /**
+     * A class representing the bit field of an MPEG header. It allows
+     * convenient access to specific bit groups.
+     */
+    private static class HeaderBitField
+    {
+        /** The internal value. */
+        private int value;
+
+        /**
+         * Adds a byte to this field.
+         * 
+         * @param b the byte to be added
+         */
+        public void add(int b)
+        {
+            value <<= 8;
+            value |= b;
+        }
+
+        /**
+         * Returns the value of the bit group from the given start and end
+         * index. E.g. ''from'' = 0, ''to'' = 3 will return the value of the
+         * first 4 bits.
+         * 
+         * @param the from index
+         * @param to the to index
+         * @return the value of this group of bits
+         */
+        public int get(int from, int to)
+        {
+            int shiftVal = value >> from;
+            int mask = (1 << (to - from + 1)) - 1;
+            return shiftVal & mask;
+        }
+
+        /**
+         * Returns the value of the bit with the given index. The bit index is
+         * 0-based. Result is either 0 or 1, depending on the value of this bit.
+         * 
+         * @param bit the bit index
+         * @return the value of this bit
+         */
+        public int get(int bit)
+        {
+            return get(bit, bit);
+        }
+
+        /**
+         * Returns the internal value of this field as an array. The array
+         * contains 3 bytes.
+         * 
+         * @return the internal value of this field as int array
+         */
+        public byte[] toArray()
+        {
+            byte[] result = new byte[3];
+            result[0] = (byte) get(16, 23);
+            result[1] = (byte) get(8, 15);
+            result[2] = (byte) get(0, 7);
+            return result;
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/DirectFileReadDataSource.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/DirectFileReadDataSource.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/DirectFileReadDataSource.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/DirectFileReadDataSource.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import com.googlecode.mp4parser.DataSource;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+
+import static com.googlecode.mp4parser.util.CastUtils.l2i;
+
+/**
+ * A {@link DataSource} implementation that relies on direct reads from a {@link RandomAccessFile}.
+ * It should be slower than {@link com.googlecode.mp4parser.FileDataSourceImpl} but does not incur the implicit file locks of
+ * memory mapped I/O on some JVMs. This implementation allows for a more controlled deletion of files
+ * and might be preferred when working with temporary files.
+ * @see <a href="http://bugs.java.com/view_bug.do?bug_id=4724038">JDK-4724038 : (fs) Add unmap method to MappedByteBuffer</a>
+ * @see <a href="http://bugs.java.com/view_bug.do?bug_id=6359560">JDK-6359560 : (fs) File.deleteOnExit() doesn't work when MappedByteBuffer exists (win)</a>
+ */
+public class DirectFileReadDataSource implements DataSource {
+
+    private static final int TRANSFER_SIZE = 8192;
+
+    private RandomAccessFile raf;
+
+    public DirectFileReadDataSource(File f) throws IOException {
+        this.raf = new RandomAccessFile(f, "r");
+    }
+
+    public int read(ByteBuffer byteBuffer) throws IOException {
+        int len = byteBuffer.remaining();
+        int totalRead = 0;
+        int bytesRead = 0;
+        byte[] buf = new byte[TRANSFER_SIZE];
+        while (totalRead < len) {
+            int bytesToRead = Math.min((len - totalRead), TRANSFER_SIZE);
+            bytesRead = raf.read(buf, 0, bytesToRead);
+            if (bytesRead < 0) {
+                break;
+            } else {
+                totalRead += bytesRead;
+            }
+            byteBuffer.put(buf, 0, bytesRead);
+        }
+        return ((bytesRead < 0) && (totalRead == 0)) ? -1 : totalRead;
+    }
+
+    public int readAllInOnce(ByteBuffer byteBuffer) throws IOException {
+        byte[] buf = new byte[byteBuffer.remaining()];
+        int read = raf.read(buf);
+        byteBuffer.put(buf, 0, read);
+        return read;
+    }
+
+    public long size() throws IOException {
+        return raf.length();
+    }
+
+    public long position() throws IOException {
+        return raf.getFilePointer();
+    }
+
+    public void position(long nuPos) throws IOException {
+        raf.seek(nuPos);
+    }
+
+    public long transferTo(long position, long count, WritableByteChannel target) throws IOException {
+        return target.write(map(position, count));
+    }
+
+    public ByteBuffer map(long startPosition, long size) throws IOException {
+        raf.seek(startPosition);
+        byte[] payload = new byte[l2i(size)];
+        raf.readFully(payload);
+        return ByteBuffer.wrap(payload);
+    }
+
+    public void close() throws IOException {
+        raf.close();
+    }
+
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,325 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMP;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.coremedia.iso.IsoFile;
+import com.coremedia.iso.boxes.Box;
+import com.coremedia.iso.boxes.Container;
+import com.coremedia.iso.boxes.FileTypeBox;
+import com.coremedia.iso.boxes.MetaBox;
+import com.coremedia.iso.boxes.MovieBox;
+import com.coremedia.iso.boxes.MovieHeaderBox;
+import com.coremedia.iso.boxes.SampleDescriptionBox;
+import com.coremedia.iso.boxes.SampleTableBox;
+import com.coremedia.iso.boxes.TrackBox;
+import com.coremedia.iso.boxes.TrackHeaderBox;
+import com.coremedia.iso.boxes.UserDataBox;
+import com.coremedia.iso.boxes.apple.AppleItemListBox;
+import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry;
+import com.googlecode.mp4parser.boxes.apple.AppleAlbumBox;
+import com.googlecode.mp4parser.boxes.apple.AppleArtistBox;
+import com.googlecode.mp4parser.boxes.apple.AppleArtist2Box;
+import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
+import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
+import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
+import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
+import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
+import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
+import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box;
+import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox;
+import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox;
+import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox;
+
+/**
+ * Parser for the MP4 media container format, as well as the older
+ *  QuickTime format that MP4 is based on.
+ * 
+ * This uses the MP4Parser project from http://code.google.com/p/mp4parser/
+ *  to do the underlying parsing
+ */
+public class MP4Parser extends AbstractParser {
+    /** Serial version UID */
+    private static final long serialVersionUID = 84011216792285L;
+    /** TODO Replace this with a 2dp Duration Property Converter */
+    private static final DecimalFormat DURATION_FORMAT = 
+            (DecimalFormat)NumberFormat.getNumberInstance(Locale.ROOT); 
+    static {
+        DURATION_FORMAT.applyPattern("0.0#");
+    }
+    
+    // Ensure this stays in Sync with the entries in tika-mimetypes.xml
+    private static final Map<MediaType,List<String>> typesMap = new HashMap<MediaType, List<String>>();
+    static {
+       // All types should be 4 bytes long, space padded as needed
+       typesMap.put(MediaType.audio("mp4"), Arrays.asList(
+             "M4A ", "M4B ", "F4A ", "F4B "));
+       typesMap.put(MediaType.video("3gpp"), Arrays.asList(
+             "3ge6", "3ge7", "3gg6", "3gp1", "3gp2", "3gp3", "3gp4", "3gp5", "3gp6", "3gs7"));
+       typesMap.put(MediaType.video("3gpp2"), Arrays.asList(
+             "3g2a", "3g2b", "3g2c"));
+       typesMap.put(MediaType.video("mp4"), Arrays.asList(
+             "mp41", "mp42"));
+       typesMap.put(MediaType.video("x-m4v"), Arrays.asList(
+             "M4V ", "M4VH", "M4VP"));
+       
+       typesMap.put(MediaType.video("quicktime"), Collections.<String>emptyList());
+       typesMap.put(MediaType.application("mp4"), Collections.<String>emptyList());
+    }
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+       Collections.unmodifiableSet(typesMap.keySet());
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        IsoFile isoFile;
+        
+        // The MP4Parser library accepts either a File, or a byte array
+        // As MP4 video files are typically large, always use a file to
+        //  avoid OOMs that may occur with in-memory buffering
+        TemporaryResources tmp = new TemporaryResources();
+        TikaInputStream tstream = TikaInputStream.get(stream, tmp);
+        try {
+            isoFile = new IsoFile(new DirectFileReadDataSource(tstream.getFile()));
+            tmp.addResource(isoFile);
+
+            // Grab the file type box
+            FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class);
+            if (fileType != null) {
+               // Identify the type
+               MediaType type = MediaType.application("mp4");
+               for (MediaType t : typesMap.keySet()) {
+                  if (typesMap.get(t).contains(fileType.getMajorBrand())) {
+                     type = t;
+                     break;
+                  }
+               }
+               metadata.set(Metadata.CONTENT_TYPE, type.toString());
+
+               if (type.getType().equals("audio")) {
+                  metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim());
+               }
+            } else {
+               // Some older QuickTime files lack the FileType
+               metadata.set(Metadata.CONTENT_TYPE, "video/quicktime");
+            }
+
+
+            // Get the main MOOV box
+            MovieBox moov = getOrNull(isoFile, MovieBox.class);
+            if (moov == null) {
+               // Bail out
+               return;
+            }
+
+
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+            xhtml.startDocument();
+
+
+            // Pull out some information from the header box
+            MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
+            if (mHeader != null) {
+               // Get the creation and modification dates
+               metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime());
+               metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());
+
+               // Get the duration
+               double durationSeconds = ((double)mHeader.getDuration()) / mHeader.getTimescale();
+               metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds));
+
+               // The timescale is normally the sampling rate
+               metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)mHeader.getTimescale());
+            }
+
+
+            // Get some more information from the track header
+            // TODO Decide how to handle multiple tracks
+            List<TrackBox> tb = moov.getBoxes(TrackBox.class);
+            if (tb.size() > 0) {
+               TrackBox track = tb.get(0);
+
+               TrackHeaderBox header = track.getTrackHeaderBox();
+               // Get the creation and modification dates
+               metadata.set(TikaCoreProperties.CREATED, header.getCreationTime());
+               metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime());
+
+               // Get the video with and height
+               metadata.set(Metadata.IMAGE_WIDTH,  (int)header.getWidth());
+               metadata.set(Metadata.IMAGE_LENGTH, (int)header.getHeight());
+
+               // Get the sample information
+               SampleTableBox samples = track.getSampleTableBox();
+               SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox();
+               if (sampleDesc != null) {
+                  // Look for the first Audio Sample, if present
+                  AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class);
+                  if (sample != null) {
+                     XMPDM.ChannelTypePropertyConverter.convertAndSet(metadata, sample.getChannelCount());
+                     //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize());    // TODO Num -> Type mapping
+                     metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)sample.getSampleRate());
+                     //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket());
+                     //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample());
+                  }
+               }
+            }
+
+            // Get metadata from the User Data Box
+            UserDataBox userData = getOrNull(moov, UserDataBox.class);
+            if (userData != null) {
+               MetaBox meta = getOrNull(userData, MetaBox.class);
+
+               // Check for iTunes Metadata
+               // See http://atomicparsley.sourceforge.net/mpeg-4files.html and
+               //  http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these
+               AppleItemListBox apple = getOrNull(meta, AppleItemListBox.class);
+               if (apple != null) {
+                  // Title
+                  AppleNameBox title = getOrNull(apple, AppleNameBox.class);
+                  addMetadata(TikaCoreProperties.TITLE, metadata, title);
+
+                  // Artist
+                  AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class);
+                  addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
+                  addMetadata(XMPDM.ARTIST, metadata, artist);
+
+                  // Album Artist
+                  AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class);
+                  addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);
+
+                  // Album
+                  AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
+                  addMetadata(XMPDM.ALBUM, metadata, album);
+
+                  // Composer
+                  AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class);
+                  addMetadata(XMPDM.COMPOSER, metadata, composer);
+
+                  // Genre
+                  AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class);
+                  addMetadata(XMPDM.GENRE, metadata, genre);
+
+                  // Year
+                  AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class);
+                  if (year != null) {
+                      metadata.set(XMPDM.RELEASE_DATE, year.getValue());
+                  }
+
+                  // Track number
+                  AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class);
+                  if (trackNum != null) {
+                     metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA());
+                     //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO
+                  }
+
+                  // Disc number
+                  AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class);
+                  if (discNum != null) {
+                     metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
+                  }
+
+                  // Compilation
+                  AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class);
+                  if (compilation != null) {
+                      metadata.set(XMPDM.COMPILATION, (int)compilation.getValue());
+                  }
+
+                  // Comment
+                  AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class);
+                  addMetadata(XMPDM.LOG_COMMENT, metadata, comment);
+
+                  // Encoder
+                  AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class);
+                  if (encoder != null) {
+                      metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
+                  }
+
+
+                  // As text
+                  for (Box box : apple.getBoxes()) {
+                     if (box instanceof Utf8AppleDataBox) {
+                        xhtml.element("p", ((Utf8AppleDataBox)box).getValue());
+                     }
+                  }
+               }
+
+               // TODO Check for other kinds too
+            }
+
+            // All done
+            xhtml.endDocument();
+
+        } finally {
+            tmp.dispose();
+        }
+
+    }
+    
+    private static void addMetadata(String key, Metadata m, Utf8AppleDataBox metadata) {
+       if (metadata != null) {
+          m.add(key, metadata.getValue());
+       }
+    }
+    private static void addMetadata(Property prop, Metadata m, Utf8AppleDataBox metadata) {
+       if (metadata != null) {
+          m.set(prop, metadata.getValue());
+       }
+    }
+    
+    private static <T extends Box> T getOrNull(Container box, Class<T> clazz) {
+       if (box == null) return null;
+
+       List<T> boxes = box.getBoxes(clazz);
+       if (boxes.size() == 0) {
+          return null;
+       }
+       return boxes.get(0);
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/FLVParser.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/FLVParser.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/FLVParser.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/FLVParser.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.video;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * <p>
+ * Parser for metadata contained in Flash Videos (.flv). Resources:
+ * http://osflash.org/flv and for AMF:
+ * http://download.macromedia.com/pub/labs/amf/amf0_spec_121207.pdf
+ * <p>
+ * This parser is capable of extracting the general metadata from header as well
+ * as embedded metadata.
+ * <p>
+ * Known keys for metadata (from file header):
+ * <ol>
+ * <li>hasVideo: true|false
+ * <li>hasSound: true|false
+ * </ol>
+ * <p>
+ * In addition to the above values also metadata that is inserted in to the
+ * actual stream will be picked. Usually there are keys like:
+ * hasKeyframes, lastkeyframetimestamp, audiocodecid, keyframes, filepositions,
+ * hasMetadata, audiosamplerate, videodatarate metadatadate, videocodecid,
+ * metadatacreator, audiosize, hasVideo, height, audiosamplesize, framerate,
+ * hasCuePoints width, cuePoints, lasttimestamp, canSeekToEnd, datasize,
+ * duration, videosize, filesize, audiodatarate, hasAudio, stereo audiodelay
+ */
+public class FLVParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = -8718013155719197679L;
+
+    private static int TYPE_METADATA = 0x12;
+    private static byte MASK_AUDIO = 1;
+    private static byte MASK_VIDEO = 4;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(MediaType.video("x-flv"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    private long readUInt32(DataInputStream input) throws IOException {
+        return input.readInt() & 0xFFFFFFFFL;
+    }
+
+    private int readUInt24(DataInputStream input) throws IOException {
+        int uint = input.read()<<16;
+        uint += input.read()<<8;
+        uint += input.read(); 
+        return uint;
+    }
+
+    private Object readAMFData(DataInputStream input, int type)
+            throws IOException {
+        if (type == -1) {
+            type = input.readUnsignedByte();
+        }
+        switch (type) {
+        case 0:
+            return input.readDouble();
+        case 1:
+            return input.readUnsignedByte() == 1;
+        case 2:
+            return readAMFString(input);
+        case 3:
+            return readAMFObject(input);
+        case 8:
+            return readAMFEcmaArray(input);
+        case 10:
+            return readAMFStrictArray(input);
+        case 11:
+            final Date date = new Date((long) input.readDouble());
+            input.readShort(); // time zone
+            return date;
+        case 13:
+            return "UNDEFINED";
+        default:
+            return null;
+        }
+    }
+
+    private Object readAMFStrictArray(DataInputStream input) throws IOException {
+        long count = readUInt32(input);
+        ArrayList<Object> list = new ArrayList<Object>();
+        for (int i = 0; i < count; i++) {
+            list.add(readAMFData(input, -1));
+        }
+        return list;
+    }
+
+
+    private String readAMFString(DataInputStream input) throws IOException {
+        int size = input.readUnsignedShort();
+        byte[] chars = new byte[size];
+        input.readFully(chars);
+        return new String(chars, UTF_8);
+    }
+
+    private Object readAMFObject(DataInputStream input) throws IOException {
+        HashMap<String, Object> array = new HashMap<String, Object>();
+        while (true) {
+            String key = readAMFString(input);
+            int dataType = input.read();
+            if (dataType == 9) { // object end marker
+                break;
+            }
+            array.put(key, readAMFData(input, dataType));
+        }
+        return array;
+    }
+
+    private Object readAMFEcmaArray(DataInputStream input) throws IOException {
+        long size = readUInt32(input);
+        HashMap<String, Object> array = new HashMap<String, Object>();
+        for (int i = 0; i < size; i++) {
+            String key = readAMFString(input);
+            int dataType = input.read();
+            array.put(key, readAMFData(input, dataType));
+        }
+        return array;
+    }
+
+    private boolean checkSignature(DataInputStream fis) throws IOException {
+        return fis.read() == 'F' && fis.read() == 'L' && fis.read() == 'V';
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        DataInputStream datainput = new DataInputStream(stream);
+        if (!checkSignature(datainput)) {
+            throw new TikaException("FLV signature not detected");
+        }
+
+        // header
+        int version = datainput.readUnsignedByte();
+        if (version != 1) {
+            // should be 1, perhaps this is not flv?
+            throw new TikaException("Unpexpected FLV version: " + version);
+        }
+
+        int typeFlags = datainput.readUnsignedByte();
+
+        long len = readUInt32(datainput);
+        if (len != 9) {
+            // we only know about format with header of 9 bytes
+            throw new TikaException("Unpexpected FLV header length: " + len);
+        }
+
+        long sizePrev = readUInt32(datainput);
+        if (sizePrev != 0) {
+            // should be 0, perhaps this is not flv?
+            throw new TikaException(
+                    "Unpexpected FLV first previous block size: " + sizePrev);
+        }
+
+        metadata.set(Metadata.CONTENT_TYPE, "video/x-flv");
+        metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0));
+        metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0));
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        // flv tag stream follows...
+        while (true) {
+            int type = datainput.read();
+            if (type == -1) {
+                // EOF
+                break;
+            }
+
+            int datalen = readUInt24(datainput); //body length
+            readUInt32(datainput); // timestamp
+            readUInt24(datainput); // streamid
+
+            if (type == TYPE_METADATA) {
+                // found metadata Tag, read content to buffer
+                byte[] metaBytes = new byte[datalen];
+                for (int readCount = 0; readCount < datalen;) {
+                    int r = stream.read(metaBytes, readCount, datalen - readCount);
+                    if(r!=-1) {
+                        readCount += r;
+
+                    } else {
+                        break;
+                    }
+                }
+
+                ByteArrayInputStream is = new ByteArrayInputStream(metaBytes);
+
+                DataInputStream dis = new DataInputStream(is);
+
+                Object data = null;
+
+                for (int i = 0; i < 2; i++) {
+                    data = readAMFData(dis, -1);
+                }
+
+                if (data instanceof Map) {
+                    // TODO if there are multiple metadata values with same key (in
+                    // separate AMF blocks, we currently loose previous values)
+                    Map<String, Object> extractedMetadata = (Map<String, Object>) data;
+                    for (Entry<String, Object> entry : extractedMetadata.entrySet()) {
+                        if (entry.getValue() == null) {
+                            continue;
+                        }
+                        metadata.set(entry.getKey(), entry.getValue().toString());
+                    }
+                }
+
+            } else {
+                // Tag was not metadata, skip over data we cannot handle
+                for (int i = 0; i < datalen; i++) {
+                    datainput.readByte();
+                }
+            }
+
+            sizePrev = readUInt32(datainput); // previous block size
+            if (sizePrev != datalen + 11) {
+                // file was corrupt or we could not parse it...
+                break;
+            }
+        }
+
+        xhtml.endDocument();
+    }
+
+}

Modified: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1722029&r1=1722028&r2=1722029&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (original)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Mon Dec 28 23:22:46 2015
@@ -14,5 +14,14 @@
 #  limitations under the License.
 
 
+org.apache.tika.parser.image.BPGParser
 org.apache.tika.parser.image.ImageParser
+org.apache.tika.parser.image.PSDParser
+org.apache.tika.parser.image.TiffParser
+org.apache.tika.parser.image.WebPParser
 org.apache.tika.parser.jpeg.JpegParser
+org.apache.tika.parser.audio.AudioParser
+org.apache.tika.parser.audio.MidiParser
+org.apache.tika.parser.mp3.Mp3Parser
+org.apache.tika.parser.mp4.MP4Parser
+org.apache.tika.parser.video.FLVParser

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class AudioParserTest {
+
+    @Test
+    public void testWAV() throws Exception {
+        String path = "/test-documents/testWAV.wav";
+        Metadata metadata = new Metadata();
+        String content = new Tika().parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
+
+        assertEquals("audio/x-wav", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("44100.0", metadata.get("samplerate"));
+        assertEquals("2", metadata.get("channels"));
+        assertEquals("16", metadata.get("bits"));
+        assertEquals("PCM_SIGNED", metadata.get("encoding"));
+
+        assertEquals("", content);
+    }
+
+    @Test
+    public void testAIFF() throws Exception {
+        String path = "/test-documents/testAIFF.aif";
+        Metadata metadata = new Metadata();
+        String content = new Tika().parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
+
+        assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("44100.0", metadata.get("samplerate"));
+        assertEquals("2", metadata.get("channels"));
+        assertEquals("16", metadata.get("bits"));
+        assertEquals("PCM_SIGNED", metadata.get("encoding"));
+
+        assertEquals("", content);
+    }
+
+    @Test
+    public void testAU() throws Exception {
+        String path = "/test-documents/testAU.au";
+        Metadata metadata = new Metadata();
+        String content = new Tika().parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
+
+        assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("44100.0", metadata.get("samplerate"));
+        assertEquals("2", metadata.get("channels"));
+        assertEquals("16", metadata.get("bits"));
+        assertEquals("PCM_SIGNED", metadata.get("encoding"));
+
+        assertEquals("", content);
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import static org.junit.Assert.assertEquals;
+import static org.apache.tika.TikaTest.assertContains;
+
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class MidiParserTest {
+
+    @Test
+    public void testMID() throws Exception {
+        String path = "/test-documents/testMID.mid";
+        Metadata metadata = new Metadata();
+        String content = new Tika().parseToString(
+                MidiParserTest.class.getResourceAsStream(path), metadata);
+
+        assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("2", metadata.get("tracks"));
+        assertEquals("0", metadata.get("patches"));
+        assertEquals("PPQ", metadata.get("divisionType"));
+
+        assertContains("Untitled", content);
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/BPGParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/BPGParserTest.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/BPGParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/BPGParserTest.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Photoshop;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class BPGParserTest {
+    private final Parser parser = new BPGParser();
+
+    /**
+     * Tests a very basic file, without much metadata
+     */
+    @Test
+    public void testBPG() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
+        InputStream stream =
+                getClass().getResourceAsStream("/test-documents/testBPG.bpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
+    }
+
+    /**
+     * Tests a file with comments
+     */
+    @Test
+    public void testBPG_Commented() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
+        InputStream stream =
+                getClass().getResourceAsStream("/test-documents/testBPG_commented.bpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
+
+        // TODO Get the exif comment data to be properly extracted, see TIKA-1495
+        if (false) {
+            assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+            List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
+            assertTrue(keywords.contains("coast"));
+            assertTrue(keywords.contains("bird watching"));
+            assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
+        }
+
+        // TODO Get the exif data to be properly extracted, see TIKA-1495
+        if (false) {
+            assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
+            assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
+            assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
+            assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
+            assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
+            assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
+            assertEquals(null, metadata.get(Metadata.SOFTWARE));
+            assertEquals("1", metadata.get(Metadata.ORIENTATION));
+            assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+            assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+            assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+        }
+    }
+
+    /**
+     * Tests a file with geographic information in it
+     */
+    @Test
+    public void testBPG_Geo() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
+        InputStream stream =
+                getClass().getResourceAsStream("/test-documents/testBPG_GEO.bpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
+
+        // TODO Get the geographic data to be properly extracted, see TIKA-1495
+        if (false) {
+            assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
+            assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+        }
+
+        // TODO Get the exif data to be properly extracted, see TIKA-1495
+        if (false) {
+            assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+            assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+            assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
+            assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+            assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+            assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+            assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+            assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+            assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+            assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+            assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+        }
+    }
+}

Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java?rev=1722029&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java Mon Dec 28 23:22:46 2015
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.util.Arrays;
+import java.util.GregorianCalendar;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import com.drew.metadata.Directory;
+import com.drew.metadata.MetadataException;
+import com.drew.metadata.Tag;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.jpeg.JpegCommentDirectory;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Test;
+
+public class ImageMetadataExtractorTest {
+
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    @Test
+    public void testHandleDirectories() throws MetadataException {
+        Metadata metadata = mock(Metadata.class);
+        ImageMetadataExtractor.DirectoryHandler handler1 = mock(ImageMetadataExtractor.DirectoryHandler.class);
+        ImageMetadataExtractor e = new ImageMetadataExtractor(metadata, handler1);
+
+        Directory directory = new JpegCommentDirectory();
+        Iterator directories = mock(Iterator.class);
+        when(directories.hasNext()).thenReturn(true, false);
+        when(directories.next()).thenReturn(directory);
+        when(handler1.supports(JpegCommentDirectory.class)).thenReturn(true);
+
+        e.handle(directories);
+        verify(handler1).supports(JpegCommentDirectory.class);
+        verify(handler1).handle(directory, metadata);
+    }
+
+    @Test
+    public void testExifHandlerSupports() {
+        assertTrue(new ImageMetadataExtractor.ExifHandler().supports(ExifIFD0Directory.class));
+        assertTrue(new ImageMetadataExtractor.ExifHandler().supports(ExifSubIFDDirectory.class));
+        assertFalse(new ImageMetadataExtractor.ExifHandler().supports(Directory.class));
+        assertFalse(new ImageMetadataExtractor.ExifHandler().supports(JpegCommentDirectory.class));
+    }
+
+    @Test
+    public void testExifHandlerParseDate() throws MetadataException {
+        ExifSubIFDDirectory exif = mock(ExifSubIFDDirectory.class);
+        when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+        GregorianCalendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
+        calendar.setTimeInMillis(0);
+        calendar.set(2000, 0, 1, 0, 0, 0);
+        when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
+                calendar.getTime()); // jvm default timezone as in Metadata Extractor
+        Metadata metadata = new Metadata();
+
+        new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
+        assertEquals("Should be ISO date without time zone", "2000-01-01T00:00:00",
+                metadata.get(TikaCoreProperties.CREATED));
+    }
+
+    @Test
+    public void testExifHandlerParseDateFallback() throws MetadataException {
+        ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+        when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true);
+        GregorianCalendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
+        calendar.setTimeInMillis(0);
+        calendar.set(1999, 0, 1, 0, 0, 0);
+        when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)).thenReturn(
+                calendar.getTime()); // jvm default timezone as in Metadata Extractor
+        Metadata metadata = new Metadata();
+
+        new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
+        assertEquals("Should try EXIF Date/Time if Original is not set", "1999-01-01T00:00:00",
+                metadata.get(TikaCoreProperties.CREATED));
+    }
+
+    @Test
+    public void testExifHandlerParseDateError() throws MetadataException {
+        ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+        when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+        when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(null);
+        Metadata metadata = new Metadata();
+
+        new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
+        assertEquals("Parsing should proceed without date", null,
+                metadata.get(TikaCoreProperties.CREATED));
+    }
+
+    @Test
+    public void testCopyUnknownFieldsHandler() throws MetadataException {
+        Directory d = mock(Directory.class);
+        Tag t1 = mock(Tag.class);
+        when(t1.getTagName()).thenReturn("Image Description");
+        when(t1.getDescription()).thenReturn("t1");
+        Tag t2 = mock(Tag.class);
+        when(t2.getTagName()).thenReturn(Metadata.KEYWORDS);
+        when(t2.getDescription()).thenReturn("known");
+        Tag t3 = mock(Tag.class);
+        when(t3.getTagName()).thenReturn(TikaCoreProperties.DESCRIPTION.getName());
+        when(t3.getDescription()).thenReturn("known");
+        List<Tag> tags = Arrays.asList(t1, t2, t3);
+        when(d.getTags()).thenReturn(tags);
+        Metadata metadata = new Metadata();
+        new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d, metadata);
+        assertEquals("t1", metadata.get("Image Description"));
+        assertNull("keywords should be excluded from bulk copy because it is a defined field",
+                metadata.get(Metadata.KEYWORDS));
+        assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
+    }
+
+}