You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/02/04 23:24:02 UTC

svn commit: r1442402 - in /tika/trunk: ./ tika-core/src/main/java/org/apache/tika/io/ tika-core/src/test/java/org/apache/tika/io/ tika-parsers/src/main/java/org/apache/tika/parser/mp3/ tika-parsers/src/test/java/org/apache/tika/parser/mp3/

Author: nick
Date: Mon Feb  4 22:24:01 2013
New Revision: 1442402

URL: http://svn.apache.org/viewvc?rev=1442402&view=rev
Log:
Apply patch from Oliver Heger from TIKA-991 - Re-work MP3 parser to capture audio duration by processing more of the audio frames

Added:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Feb  4 22:24:01 2013
@@ -6,6 +6,8 @@ Release 1.4 Current Development
 
   * RTF: Ordered and unordered lists are now extracted (TIKA-1062).
 
+  * MP3: Audio duration is now extracted (TIKA-991)
+
 Release 1.3 - 01/19/2013
 
   * Mimetype definitions added for more common programming languages,

Added: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java Mon Feb  4 22:24:01 2013
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.io;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * <p>
+ * A specialized input stream implementation which records the last portion read
+ * from an underlying stream.
+ * </p>
+ * <p>
+ * This stream implementation is useful to deal with information which is known
+ * to be located at the end of a stream (e.g. ID3 v1 tags). While reading bytes
+ * from the underlying stream, a given number of bytes is kept in an internal
+ * buffer. This buffer can then be queried after the whole stream was read. It
+ * contains the last bytes read from the original input stream.
+ * </p>
+ * 
+ * @param in the underlying input stream
+ * @param tailSize the size of the tail buffer
+ */
+public class TailStream extends FilterInputStream
+{
+    /** Constant for the default skip buffer size. */
+    private static final int SKIP_SIZE = 4096;
+    
+    /** The buffer in which the tail data is stored. */
+    private final byte[] tailBuffer;
+
+    /** The size of the internal tail buffer. */
+    private final int tailSize;
+
+    /** A copy of the internal tail buffer used for mark() operations. */
+    private byte[] markBuffer;
+
+    /** The number of bytes that have been read so far. */
+    private long bytesRead;
+
+    /** The number of bytes read at the last mark() operation. */
+    private long markBytesRead;
+
+    /** The current index into the tail buffer. */
+    private int currentIndex;
+
+    /** A copy of the current index used for mark() operations. */
+    private int markIndex;
+
+    /**
+     * Creates a new instance of {@code TailStream}.
+     * 
+     * @param in the underlying input stream
+     * @param size the size of the tail buffer
+     */
+    public TailStream(InputStream in, int size)
+    {
+        super(in);
+        tailSize = size;
+        tailBuffer = new byte[size];
+    }
+
+    /**
+     * {@inheritDoc} This implementation adds the read byte to the internal tail
+     * buffer.
+     */
+    @Override
+    public int read() throws IOException
+    {
+        int c = super.read();
+        if (c != -1)
+        {
+            appendByte((byte) c);
+        }
+        return c;
+    }
+
+    /**
+     * {@inheritDoc} This implementation delegates to the underlying stream and
+     * then adds the correct portion of the read buffer to the internal tail
+     * buffer.
+     */
+    @Override
+    public int read(byte[] buf) throws IOException
+    {
+        int read = super.read(buf);
+        if (read > 0)
+        {
+            appendBuf(buf, 0, read);
+        }
+        return read;
+    }
+
+    /**
+     * {@inheritDoc} This implementation delegates to the underlying stream and
+     * then adds the correct portion of the read buffer to the internal tail
+     * buffer.
+     */
+    @Override
+    public int read(byte[] buf, int ofs, int length) throws IOException
+    {
+        int read = super.read(buf, ofs, length);
+        if (read > 0)
+        {
+            appendBuf(buf, ofs, read);
+        }
+        return read;
+    }
+    
+    /**
+     * {@inheritDoc} This implementation delegates to the {@code read()} method
+     * to ensure that the tail buffer is also filled if data is skipped.
+     */
+    @Override
+    public long skip(long n) throws IOException
+    {
+        int bufSize = (int) Math.min(n, SKIP_SIZE);
+        byte[] buf = new byte[bufSize];
+        long bytesSkipped = 0;
+        int bytesRead = 0;
+        
+        while(bytesSkipped < n && bytesRead != -1)
+        {
+            int len = (int) Math.min(bufSize, n - bytesSkipped);
+            bytesRead = read(buf, 0, len);
+            if(bytesRead != -1)
+            {
+                bytesSkipped += bytesRead;
+            }
+        }
+        
+        return bytesSkipped;
+    }
+
+    /**
+     * {@inheritDoc} This implementation saves the internal state including the
+     * content of the tail buffer so that it can be restored when ''reset()'' is
+     * called later.
+     */
+    @Override
+    public void mark(int limit)
+    {
+        markBuffer = new byte[tailSize];
+        System.arraycopy(tailBuffer, 0, markBuffer, 0, tailSize);
+        markIndex = currentIndex;
+        markBytesRead = bytesRead;
+    }
+
+    /**
+     * {@inheritDoc} This implementation restores this stream's state to the
+     * state when ''mark()'' was called the last time. If ''mark()'' has not
+     * been called before, this method has no effect.
+     */
+    @Override
+    public void reset()
+    {
+        if (markBuffer != null)
+        {
+            System.arraycopy(markBuffer, 0, tailBuffer, 0, tailSize);
+            currentIndex = markIndex;
+            bytesRead = markBytesRead;
+        }
+    }
+
+    /**
+     * Returns an array with the last data read from the underlying stream. If
+     * the underlying stream contained more data than the ''tailSize''
+     * constructor argument, the returned array has a length of ''tailSize''.
+     * Otherwise, its length equals the number of bytes read.
+     * 
+     * @return an array with the last data read from the underlying stream
+     */
+    public byte[] getTail()
+    {
+        int size = (int) Math.min(tailSize, bytesRead);
+        byte[] result = new byte[size];
+        System.arraycopy(tailBuffer, currentIndex, result, 0, size
+                - currentIndex);
+        System.arraycopy(tailBuffer, 0, result, size - currentIndex,
+                currentIndex);
+        return result;
+    }
+
+    /**
+     * Adds the given byte to the internal tail buffer.
+     * 
+     * @param b the byte to be added
+     */
+    private void appendByte(byte b)
+    {
+        tailBuffer[currentIndex++] = b;
+        if (currentIndex >= tailSize)
+        {
+            currentIndex = 0;
+        }
+        bytesRead++;
+    }
+
+    /**
+     * Adds the content of the given buffer to the internal tail buffer.
+     * 
+     * @param buf the buffer
+     * @param ofs the start offset in the buffer
+     * @param length the number of bytes to be copied
+     */
+    private void appendBuf(byte[] buf, int ofs, int length)
+    {
+        if (length >= tailSize)
+        {
+            replaceTailBuffer(buf, ofs, length);
+        }
+        else
+        {
+            copyToTailBuffer(buf, ofs, length);
+        }
+
+        bytesRead += length;
+    }
+
+    /**
+     * Replaces the content of the internal tail buffer by the last portion of
+     * the given buffer. This method is called if a buffer was read from the
+     * underlying stream whose length is larger than the tail buffer.
+     * 
+     * @param buf the buffer
+     * @param ofs the start offset in the buffer
+     * @param length the number of bytes to be copied
+     */
+    private void replaceTailBuffer(byte[] buf, int ofs, int length)
+    {
+        System.arraycopy(buf, ofs + length - tailSize, tailBuffer, 0, tailSize);
+        currentIndex = 0;
+    }
+
+    /**
+     * Copies the given buffer into the internal tail buffer at the current
+     * position. This method is called if a buffer is read from the underlying
+     * stream whose length is smaller than the tail buffer. In this case the
+     * tail buffer is only partly overwritten.
+     * 
+     * @param buf the buffer
+     * @param ofs the start offset in the buffer
+     * @param length the number of bytes to be copied
+     */
+    private void copyToTailBuffer(byte[] buf, int ofs, int length)
+    {
+        int remaining = tailSize - currentIndex;
+        int size1 = Math.min(remaining, length);
+        System.arraycopy(buf, ofs, tailBuffer, currentIndex, size1);
+        System.arraycopy(buf, ofs + size1, tailBuffer, 0, length - size1);
+        currentIndex = (currentIndex + length) % tailSize;
+    }
+}

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java Mon Feb  4 22:24:01 2013
@@ -0,0 +1,201 @@
+package org.apache.tika.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.junit.Test;
+
+/**
+ * Test class for {@code TailStream}.
+ */
+public class TailStreamTest
+{
+    /** Constant for generating test text. */
+    private static final String TEXT =
+            "Lorem ipsum dolor sit amet, consetetur "
+                    + "sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut "
+                    + "labore et dolore magna aliquyam erat, sed diam voluptua. At vero"
+                    + " eos et accusam et justo duo dolores et ea rebum. Stet clita "
+                    + "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor "
+                    + "sit amet.";
+
+    /**
+     * Generates a test text using the specified parameters.
+     * 
+     * @param from the start index of the text
+     * @param length the length of the text
+     * @return the generated test text
+     */
+    private static String generateText(int from, int length)
+    {
+        int count = from + length;
+        StringBuilder buf = new StringBuilder(count);
+        while (buf.length() < count)
+        {
+            buf.append(TEXT);
+        }
+        return buf.substring(from, from + length);
+    }
+
+    /**
+     * Generates a stream which contains a test text.
+     * 
+     * @param from the start index of the text
+     * @param length the length of the generated stream
+     * @return the stream with the test text
+     */
+    private static InputStream generateStream(int from, int length)
+    {
+        return new ByteArrayInputStream(generateText(from, length).getBytes());
+    }
+
+    /**
+     * Helper method for reading the content of an input stream.
+     * 
+     * @param in the stream to be read
+     * @return an array with the content of the stream
+     * @throws IOException if an error occurs
+     */
+    private static byte[] readStream(InputStream in) throws IOException
+    {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        int c;
+        while ((c = in.read()) != -1)
+        {
+            bos.write(c);
+        }
+        return bos.toByteArray();
+    }
+
+    /**
+     * Tests whether the tail buffer can be obtained before data was read.
+     */
+    @Test
+    public void testTailBeforeRead() throws IOException
+    {
+        TailStream stream = new TailStream(generateStream(0, 100), 50);
+        assertEquals("Wrong buffer length", 0, stream.getTail().length);
+        stream.close();
+    }
+
+    /**
+     * Tests the content of the tail buffer if it is only partly filled.
+     */
+    @Test
+    public void testTailBufferPartlyRead() throws IOException
+    {
+        final int count = 64;
+        TailStream stream = new TailStream(generateStream(0, count), 2 * count);
+        byte[] data = readStream(stream);
+        assertTrue("Wrong content", Arrays.equals(data, stream.getTail()));
+        stream.close();
+    }
+
+    /**
+     * Tests the content of the tail buffer if only single bytes were read.
+     */
+    @Test
+    public void testTailSingleByteReads() throws IOException
+    {
+        final int count = 128;
+        TailStream stream = new TailStream(generateStream(0, 2 * count), count);
+        readStream(stream);
+        assertEquals("Wrong buffer", generateText(count, count), new String(
+                stream.getTail()));
+    }
+
+    /**
+     * Tests the content of the tail buffer if larger chunks are read.
+     */
+    @Test
+    public void testTailChunkReads() throws IOException
+    {
+        final int count = 16384;
+        final int tailSize = 61;
+        final int bufSize = 100;
+        TailStream stream = new TailStream(generateStream(0, count), tailSize);
+        byte[] buf = new byte[bufSize];
+        int read = stream.read(buf, 10, 8);
+        assertEquals("Wrong number of bytes read", 8, read);
+        while (read != -1)
+        {
+            read = stream.read(buf);
+        }
+        assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
+                new String(stream.getTail()));
+        stream.close();
+    }
+
+    /**
+     * Tests whether mark() and reset() work as expected.
+     */
+    @Test
+    public void testReadWithMarkAndReset() throws IOException
+    {
+        final int tailSize = 64;
+        TailStream stream =
+                new TailStream(generateStream(0, 2 * tailSize), tailSize);
+        byte[] buf = new byte[tailSize / 2];
+        stream.read(buf);
+        stream.mark(tailSize);
+        stream.read(buf);
+        stream.reset();
+        readStream(stream);
+        assertEquals("Wrong buffer", generateText(tailSize, tailSize),
+                new String(stream.getTail()));
+    }
+
+    /**
+     * Tests whether a reset() operation without a mark is simply ignored.
+     */
+    @Test
+    public void testResetWithoutMark() throws IOException
+    {
+        final int tailSize = 75;
+        final int count = 128;
+        TailStream stream = new TailStream(generateStream(0, count), tailSize);
+        stream.reset();
+        byte[] buf = new byte[count];
+        stream.read(buf);
+        assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
+                new String(stream.getTail()));
+        stream.close();
+    }
+
+    /**
+     * Tests whether skip() also fills the tail buffer.
+     */
+    @Test
+    public void testSkip() throws IOException
+    {
+        final int tailSize = 128;
+        final int count = 1024;
+        final int skipCount = 512;
+        TailStream stream = new TailStream(generateStream(0, count), tailSize);
+        assertEquals("Wrong skip result", skipCount, stream.skip(skipCount));
+        assertEquals("Wrong buffer",
+                generateText(skipCount - tailSize, tailSize),
+                new String(stream.getTail()));
+        stream.close();
+    }
+
+    /**
+     * Tests a skip operation at the end of the stream.
+     */
+    @Test
+    public void testSkipEOS() throws IOException
+    {
+        final int count = 128;
+        TailStream stream = new TailStream(generateStream(0, count), 2 * count);
+        assertEquals("Wrong skip result", count, stream.skip(2 * count));
+        assertEquals("Wrong buffer", generateText(0, count),
+                new String(stream.getTail()));
+        stream.close();
+    }
+}

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java Mon Feb  4 22:24:01 2013
@@ -28,9 +28,32 @@ import org.xml.sax.SAXException;
  * Currently, only the header is processed, not the raw audio data.
  */
 public class AudioFrame implements MP3Frame {
-    private String version;
-    private int sampleRate;
-    private int channels;
+    /** Constant for the MPEG version 1. */
+    public static final int MPEG_V1 = 3;
+
+    /** Constant for the MPEG version 2. */
+    public static final int MPEG_V2 = 2;
+
+    /** Constant for the MPEG version 2.5. */
+    public static final int MPEG_V2_5 = 0;
+
+    /** Constant for audio layer 1. */
+    public static final int LAYER_1 = 3;
+    
+    /** Constant for audio layer 2. */
+    public static final int LAYER_2 = 2;
+    
+    /** Constant for audio layer 3. */
+    public static final int LAYER_3 = 1;
+    
+    private final String version;
+    private final int versionCode;
+    private final int layer;
+    private final int sampleRate;
+    private final int channels;
+    private final int bitRate;
+    private final int length;
+    private final float duration;
 
     public String getVersion() {
         return version;
@@ -51,6 +74,51 @@ public class AudioFrame implements MP3Fr
     }
 
     /**
+     * Get the version code.
+     * @return the version code (one of the {@code MPEG} constants)
+     */
+    public int getVersionCode()
+    {
+        return versionCode;
+    }
+
+    /**
+     * Get the audio layer code.
+     * @return the audio layer (one of the {@code LAYER} constants)
+     */
+    public int getLayer()
+    {
+        return layer;
+    }
+
+    /**
+     * Get the bit rate in bit per second.
+     * @return the bit rate
+     */
+    public int getBitRate()
+    {
+        return bitRate;
+    }
+
+    /**
+     * Returns the frame length in bytes.
+     * @return the frame length
+     */
+    public int getLength()
+    {
+        return length;
+    }
+
+    /**
+     * Returns the duration in milliseconds.
+     * @return the duration
+     */
+    public float getDuration()
+    {
+        return duration;
+    }
+
+    /**
      * Does this appear to be a 4 byte audio frame header?
      */
     public static boolean isAudioHeader(int h1, int h2, int h3, int h4) {
@@ -65,12 +133,19 @@ public class AudioFrame implements MP3Fr
         return false;
     }
 
-
+    /**
+     * @deprecated Use the constructor which is passed all values directly.
+     */
+    @Deprecated
     public AudioFrame(InputStream stream, ContentHandler handler)
             throws IOException, SAXException, TikaException {
         this(-2, -2, -2, -2, stream);
     }
 
+    /**
+     * @deprecated Use the constructor which is passed all values directly.
+     */
+    @Deprecated
     public AudioFrame(int h1, int h2, int h3, int h4, InputStream in)
             throws IOException {
         if (h1 == -2 && h2 == -2 && h3 == -2 && h4 == -2) {
@@ -81,46 +156,28 @@ public class AudioFrame implements MP3Fr
         }
 
         if (isAudioHeader(h1, h2, h3, h4)) {
-            version = "MPEG 3 Layer ";
-            int layer = (h2 >> 1) & 0x03;
-            if (layer == 1) {
-                version += "III";
-            } else if (layer == 2) {
-                version += "II";
-            } else if (layer == 3) {
-                version += "I";
-            } else {
-                version += "(reserved)";
-            }
-
-            version += " Version ";
-            int ver = (h2 >> 3) & 0x03;
-            if (ver == 0) {
-                version += "2.5";
-            } else if(ver == 2) {
-                version += "2";
-            } else if(ver == 3) {
-                version += "1";
-            } else {
-                version += "(reseved)";
-            }
-
-            int rate = (h3 >> 2) & 0x03;
-            switch (rate) {
+            layer = (h2 >> 1) & 0x03;
+            versionCode = (h2 >> 3) & 0x03;
+            version = generateVersionStr(versionCode, layer);
+
+            int rateCode = (h3 >> 2) & 0x03;
+            int rate;
+            switch (rateCode) {
             case 0:
-                sampleRate = 11025;
+                rate = 11025;
                 break;
             case 1:
-                sampleRate = 12000;
+                rate = 12000;
                 break;
             default:
-                sampleRate = 8000;
+                rate = 8000;
             }
-            if (ver == 2) {
-                sampleRate *= 2;
-            } else if(ver == 3) {
-                sampleRate *= 4;
+            if (versionCode == MPEG_V2) {
+                rate *= 2;
+            } else if(versionCode == MPEG_V1) {
+                rate *= 4;
             }
+            sampleRate = rate;
 
             int chans = h4 & 0x192;
             if (chans < 3) {
@@ -129,9 +186,67 @@ public class AudioFrame implements MP3Fr
             } else {
                 channels = 1;
             }
+            bitRate = 0;
+            duration = 0;
+            length = 0;
         } else {
             throw new IllegalArgumentException("Magic Audio Frame Header not found");
         }
     }
+    
+    /**
+     * 
+     * Creates a new instance of {@code AudioFrame} and initializes all properties.
+     * @param mpegVersion the code for the MPEG version
+     * @param layer the code for the layer
+     * @param bitRate the bit rate (in bps)
+     * @param sampleRate the sample rate (in samples per second)
+     * @param channels the number of channels
+     * @param length the frame length (in bytes)
+     * @param duration the duration of this frame (in milliseconds)
+     */
+    public AudioFrame(int mpegVersion, int layer, int bitRate, int sampleRate,
+            int channels, int length, float duration) {
+        versionCode = mpegVersion;
+        this.layer = layer;
+        this.bitRate = bitRate;
+        this.sampleRate = sampleRate;
+        this.channels = channels;
+        this.length = length;
+        this.duration = duration;
+        version = generateVersionStr(mpegVersion, layer);
+    }
 
+    /**
+     * Generates a string for the version of this audio frame.
+     * @param version the code for the MPEG version
+     * @param layer the code for the layer
+     * @return a string for the version
+     */
+    private static String generateVersionStr(int version, int layer) {
+        StringBuilder buf = new StringBuilder(64);
+        buf.append("MPEG 3 Layer ");
+        if (layer == LAYER_3) {
+            buf.append("III");
+        } else if (layer == LAYER_2) {
+            buf.append("II");
+        } else if (layer == LAYER_1) {
+            buf.append("I");
+        } else {
+            buf.append("(reserved)");
+        }
+
+        buf.append(" Version ");
+        if (version == MPEG_V2_5) {
+            buf.append("2.5");
+        } else if(version == MPEG_V2) {
+            buf.append("2");
+        } else if(version == MPEG_V1) {
+            buf.append("1");
+        } else {
+            buf.append("(reseved)");
+        }
+        
+        return buf.toString();
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Mon Feb  4 22:24:01 2013
@@ -18,6 +18,7 @@ package org.apache.tika.parser.mp3;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.io.UnsupportedEncodingException;
 import java.util.Iterator;
 
@@ -61,10 +62,12 @@ public class ID3v2Frame implements MP3Fr
     }
 
     /**
-     * Returns the next Frame (ID3v2 or Audio) in
+     * Returns the next ID3v2 Frame in
      *  the file, or null if the next batch of data
-     *  doesn't correspond to either an ID3v2 Frame
-     *  or an Audio Frame.
+     *  doesn't correspond to either an ID3v2 header.
+     * If no ID3v2 frame could be detected and the passed in input stream is a
+     * {@code PushbackInputStream}, the bytes read so far are pushed back so
+     * that they can be read again.
      * ID3v2 Frames should come before all Audio ones.
      */
     public static MP3Frame createFrameIfPresent(InputStream inp)
@@ -78,21 +81,39 @@ public class ID3v2Frame implements MP3Fr
             int majorVersion = inp.read();
             int minorVersion = inp.read();
             if (majorVersion == -1 || minorVersion == -1) {
+                pushBack(inp, h1, h2, h3, majorVersion, minorVersion);
                 return null;
             }
             return new ID3v2Frame(majorVersion, minorVersion, inp);
         }
-        
-        // Is it an Audio Frame?
-        int h4 = inp.read();
-        if (AudioFrame.isAudioHeader(h1, h2, h3, h4)) {
-            return new AudioFrame(h1, h2, h3, h4, inp);
-        }
-        
+
         // Not a frame header
+        pushBack(inp, h1, h2, h3);
         return null;
     }
 
+    /**
+     * Pushes bytes back into the stream if possible. This method is called if
+     * no ID3v2 header could be found at the current stream position.
+     * 
+     * @param inp the input stream
+     * @param bytes the bytes to be pushed back
+     * @throws IOException if an error occurs
+     */
+    private static void pushBack(InputStream inp, int... bytes)
+            throws IOException
+    {
+        if (inp instanceof PushbackInputStream)
+        {
+            byte[] buf = new byte[bytes.length];
+            for (int i = 0; i < bytes.length; i++)
+            {
+                buf[i] = (byte) bytes[i];
+            }
+            ((PushbackInputStream) inp).unread(buf);
+        }
+    }
+
     private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp)
             throws IOException {
         this.majorVersion = majorVersion;

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java Mon Feb  4 22:24:01 2013
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TailStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.XMPDM;
@@ -79,7 +80,8 @@ public class Mp3Parser extends AbstractP
            metadata.set(XMPDM.ALBUM, tag.getAlbum());
            metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
            metadata.set(XMPDM.GENRE, tag.getGenre());
-           
+           metadata.set(XMPDM.DURATION, audioAndTags.duration);
+
            List<String> comments = new ArrayList<String>();
            for (ID3Comment comment : tag.getComments()) {
               StringBuffer cmt = new StringBuffer();
@@ -113,6 +115,7 @@ public class Mp3Parser extends AbstractP
             }
             xhtml.element("p", tag.getYear());
             xhtml.element("p", tag.getGenre());
+            xhtml.element("p", String.valueOf(audioAndTags.duration));
             for (String comment : comments) {
                xhtml.element("p", comment);
             }
@@ -157,11 +160,14 @@ public class Mp3Parser extends AbstractP
        LyricsHandler lyrics = null;
        AudioFrame firstAudio = null;
 
+       TailStream tailStream = new TailStream(stream, 10240+128);
+       MpegStream mpegStream = new MpegStream(tailStream);
+
        // ID3v2 tags live at the start of the file
        // You can apparently have several different ID3 tag blocks
        // So, keep going until we don't find any more
        MP3Frame f;
-       while ((f = ID3v2Frame.createFrameIfPresent(stream)) != null && firstAudio == null) {
+       while ((f = ID3v2Frame.createFrameIfPresent(mpegStream)) != null) {
            if(f instanceof ID3v2Frame) {
                ID3v2Frame id3F = (ID3v2Frame)f;
                if (id3F.getMajorVersion() == 4) {
@@ -171,15 +177,27 @@ public class Mp3Parser extends AbstractP
                } else if(id3F.getMajorVersion() == 2) {
                    v22 = new ID3v22Handler(id3F);
                }
-           } else if(f instanceof AudioFrame) {
-               firstAudio = (AudioFrame)f;
            }
        }
 
+        // Now iterate over all audio frames in the file
+        AudioFrame frame = mpegStream.nextFrame();
+        float duration = 0;
+        while (frame != null)
+        {
+            duration += frame.getDuration();
+            if (firstAudio == null)
+            {
+                firstAudio = frame;
+            }
+            mpegStream.skipFrame();
+            frame = mpegStream.nextFrame();
+        }
+
        // ID3v1 tags live at the end of the file
        // Lyrics live just before ID3v1, at the end of the file
        // Search for both (handlers seek to the end for us)
-       lyrics = new LyricsHandler(stream, handler);
+       lyrics = new LyricsHandler(tailStream.getTail());
        v1 = lyrics.id3v1;
 
        // Go in order of preference
@@ -203,6 +221,7 @@ public class Mp3Parser extends AbstractP
        ret.audio = firstAudio;
        ret.lyrics = lyrics;
        ret.tags = tags.toArray(new ID3Tags[tags.size()]);
+       ret.duration = duration;
        return ret;
     }
 
@@ -210,6 +229,7 @@ public class Mp3Parser extends AbstractP
         private ID3Tags[] tags;
         private AudioFrame audio;
         private LyricsHandler lyrics;
+        private float duration;
     }
 
 }

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java Mon Feb  4 22:24:01 2013
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * <p>
+ * A specialized stream class which can be used to extract single frames of MPEG
+ * audio files.
+ * </p>
+ * <p>
+ * Instances of this class are constructed with an underlying stream which
+ * should point to an audio file. Read operations are possible in the usual way.
+ * However, there are special methods for searching and extracting headers of
+ * MPEG frames. Some meta information of frames can be queried.
+ * </p>
+ */
+class MpegStream extends PushbackInputStream
+{
+    /** Bit rate table for MPEG V1, layer 1. */
+    private static final int[] BIT_RATE_MPEG1_L1 = {
+            0, 32000, 64000, 96000, 128000, 160000, 192000, 224000, 256000,
+            288000, 320000, 352000, 384000, 416000, 448000
+    };
+
+    /** Bit rate table for MPEG V1, layer 2. */
+    private static final int[] BIT_RATE_MPEG1_L2 = {
+            0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            160000, 192000, 224000, 256000, 320000, 384000
+    };
+
+    /** Bit rate table for MPEG V1, layer 3. */
+    private static final int[] BIT_RATE_MPEG1_L3 = {
+            0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            160000, 192000, 224000, 256000, 320000
+    };
+
+    /** Bit rate table for MPEG V2/V2.5, layer 1. */
+    private static final int[] BIT_RATE_MPEG2_L1 = {
+            0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+            144000, 160000, 176000, 192000, 224000, 256000
+    };
+
+    /** Bit rate table for MPEG V2/V2.5, layer 2 and 3. */
+    private static final int[] BIT_RATE_MPEG2_L2 = {
+            0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000,
+            96000, 112000, 128000, 144000, 160000
+    };
+
+    /** Sample rate table for MPEG V1. */
+    private static final int[] SAMPLE_RATE_MPEG1 = {
+            44100, 48000, 32000
+    };
+
+    /** Sample rate table for MPEG V2. */
+    private static final int[] SAMPLE_RATE_MPEG2 = {
+            22050, 24000, 16000
+    };
+
+    /** Sample rate table for MPEG V2.5. */
+    private static final int[] SAMPLE_RATE_MPEG2_5 = {
+            11025, 12000, 8000
+    };
+
+    /** Sample rate table for all MPEG versions. */
+    private static final int[][] SAMPLE_RATE = createSampleRateTable();
+
+    /** Constant for the number of samples for a layer 1 frame. */
+    private static final int SAMPLE_COUNT_L1 = 384;
+
+    /** Constant for the number of samples for a layer 2 or 3 frame. */
+    private static final int SAMPLE_COUNT_L2 = 1152;
+
+    /** Constant for the size of an MPEG frame header in bytes. */
+    private static final int HEADER_SIZE = 4;
+
+    /** The current MPEG header. */
+    private AudioFrame currentHeader;
+
+    /** A flag whether the end of the stream is reached. */
+    private boolean endOfStream;
+
+    /**
+     * Creates a new instance of {@code MpegStream} and initializes it with the
+     * underlying stream.
+     * 
+     * @param in the underlying audio stream
+     */
+    public MpegStream(InputStream in)
+    {
+        super(in, 2 * HEADER_SIZE);
+    }
+
+    /**
+     * Searches for the next MPEG frame header from the current stream position
+     * on. This method advances the underlying input stream until it finds a
+     * valid frame header or the end of the stream is reached. In the former
+     * case a corresponding {@code AudioFrame} object is created. In the latter
+     * case there are no more headers, so the end of the stream is probably
+     * reached.
+     * 
+     * @return the next {@code AudioFrame} or <b>null</b>
+     * @throws IOException if an IO error occurs
+     */
+    public AudioFrame nextFrame() throws IOException
+    {
+        AudioFrame frame = null;
+        while (!endOfStream && frame == null)
+        {
+            findFrameSyncByte();
+            if (!endOfStream)
+            {
+                HeaderBitField headerField = createHeaderField();
+                if (!endOfStream)
+                {
+                    frame = createHeader(headerField);
+                    if (frame == null)
+                    {
+                        pushBack(headerField);
+                    }
+                }
+            }
+        }
+
+        currentHeader = frame;
+        return frame;
+    }
+
+    /**
+     * Skips the current MPEG frame. This method can be called after a valid
+     * MPEG header has been retrieved using {@code nextFrame()}. In this case
+     * the underlying stream is advanced to the end of the associated MPEG
+     * frame. Otherwise, this method has no effect. The return value indicates
+     * whether a frame could be skipped.
+     * 
+     * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise
+     * @throws IOException if an IO error occurs
+     */
+    public boolean skipFrame() throws IOException
+    {
+        if (currentHeader != null)
+        {
+            skipStream(in, currentHeader.getLength() - HEADER_SIZE);
+            currentHeader = null;
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Advances the underlying stream until the first byte of frame sync is
+     * found.
+     * 
+     * @throws IOException if an error occurs
+     */
+    private void findFrameSyncByte() throws IOException
+    {
+        boolean found = false;
+        while (!found && !endOfStream)
+        {
+            if (nextByte() == 0xFF)
+            {
+                found = true;
+            }
+        }
+    }
+
+    /**
+     * Creates a bit field for the MPEG frame header.
+     * 
+     * @return the bit field
+     * @throws IOException if an error occurs
+     */
+    private HeaderBitField createHeaderField() throws IOException
+    {
+        HeaderBitField field = new HeaderBitField();
+        field.add(nextByte());
+        field.add(nextByte());
+        field.add(nextByte());
+        return field;
+    }
+
+    /**
+     * Creates an {@code AudioFrame} object based on the given header field. If
+     * the header field contains invalid values, result is <b>null</b>.
+     * 
+     * @param bits the header bit field
+     * @return the {@code AudioFrame}
+     */
+    private AudioFrame createHeader(HeaderBitField bits)
+    {
+        if (bits.get(21, 23) != 7)
+        {
+            return null;
+        }
+
+        int mpegVer = bits.get(19, 20);
+        int layer = bits.get(17, 18);
+        int bitRateCode = bits.get(12, 15);
+        int sampleRateCode = bits.get(10, 11);
+        int padding = bits.get(9);
+
+        if (mpegVer == 1 || layer == 0 || bitRateCode == 0 || bitRateCode == 15
+                || sampleRateCode == 3)
+        {
+            // invalid header values
+            return null;
+        }
+
+        int bitRate = calculateBitRate(mpegVer, layer, bitRateCode);
+        int sampleRate = calculateSampleRate(mpegVer, sampleRateCode);
+        int length = calculateFrameLength(layer, bitRate, sampleRate, padding);
+        float duration = calculateDuration(layer, sampleRate);
+        int channels = calculateChannels(bits.get(6, 7));
+        return new AudioFrame(mpegVer, layer, bitRate, sampleRate, channels,
+                length, duration);
+    }
+
+    /**
+     * Reads the next byte.
+     * 
+     * @return the next byte
+     * @throws IOException if an error occurs
+     */
+    private int nextByte() throws IOException
+    {
+        int result = 0;
+        if (!endOfStream)
+        {
+            result = read();
+            if (result == -1)
+            {
+                endOfStream = true;
+            }
+        }
+        return endOfStream ? 0 : result;
+    }
+
+    /**
+     * Pushes the given header field back in the stream so that the bytes are
+     * read again. This method is called if an invalid header was detected. Then
+     * search has to continue at the next byte after the frame sync byte.
+     * 
+     * @param field the header bit field with the invalid frame header
+     * @throws IOException if an error occurs
+     */
+    private void pushBack(HeaderBitField field) throws IOException
+    {
+        unread(field.toArray());
+    }
+
+    /**
+     * Skips the given number of bytes from the specified input stream.
+     * 
+     * @param in the input stream
+     * @param count the number of bytes to skip
+     * @throws IOException if an IO error occurs
+     */
+    private static void skipStream(InputStream in, long count)
+            throws IOException
+    {
+        long size = count;
+        long skipped = 0;
+        while (size > 0 && skipped >= 0)
+        {
+            skipped = in.skip(size);
+            if (skipped != -1)
+            {
+                size -= skipped;
+            }
+        }
+    }
+    
+    /**
+     * Calculates the bit rate based on the given parameters.
+     * 
+     * @param mpegVer the MPEG version
+     * @param layer the layer
+     * @param code the code for the bit rate
+     * @return the bit rate in bits per second
+     */
+    private static int calculateBitRate(int mpegVer, int layer, int code)
+    {
+        int[] arr = null;
+
+        if (mpegVer == AudioFrame.MPEG_V1)
+        {
+            switch (layer)
+            {
+            case AudioFrame.LAYER_1:
+                arr = BIT_RATE_MPEG1_L1;
+                break;
+            case AudioFrame.LAYER_2:
+                arr = BIT_RATE_MPEG1_L2;
+                break;
+            case AudioFrame.LAYER_3:
+                arr = BIT_RATE_MPEG1_L3;
+                break;
+            }
+        }
+        else
+        {
+            if (layer == AudioFrame.LAYER_1)
+            {
+                arr = BIT_RATE_MPEG2_L1;
+            }
+            else
+            {
+                arr = BIT_RATE_MPEG2_L2;
+            }
+        }
+        return arr[code];
+    }
+
+    /**
+     * Calculates the sample rate based on the given parameters.
+     * 
+     * @param mpegVer the MPEG version
+     * @param code the code for the sample rate
+     * @return the sample rate in samples per second
+     */
+    private static int calculateSampleRate(int mpegVer, int code)
+    {
+        return SAMPLE_RATE[mpegVer][code];
+    }
+
+    /**
+     * Calculates the length of an MPEG frame based on the given parameters.
+     * 
+     * @param layer the layer
+     * @param bitRate the bit rate
+     * @param sampleRate the sample rate
+     * @param padding the padding flag
+     * @return the length of the frame in bytes
+     */
+    private static int calculateFrameLength(int layer, int bitRate,
+            int sampleRate, int padding)
+    {
+        if (layer == AudioFrame.LAYER_1)
+        {
+            return (12 * bitRate / sampleRate + padding) * 4;
+        }
+        else
+        {
+            return 144 * bitRate / sampleRate + padding;
+        }
+    }
+
+    /**
+     * Calculates the duration of a MPEG frame based on the given parameters.
+     * 
+     * @param layer the layer
+     * @param sampleRate the sample rate
+     * @return the duration of this frame in milliseconds
+     */
+    private static float calculateDuration(int layer, int sampleRate)
+    {
+        int sampleCount =
+                (layer == AudioFrame.LAYER_1) ? SAMPLE_COUNT_L1
+                        : SAMPLE_COUNT_L2;
+        return (1000.0f / sampleRate) * sampleCount;
+    }
+
+    /**
+     * Calculates the number of channels based on the given parameters.
+     * 
+     * @param chan the code for the channels
+     * @return the number of channels
+     */
+    private static int calculateChannels(int chan)
+    {
+        return chan < 3 ? 2 : 1;
+    }
+
+    /**
+     * Creates the complete array for the sample rate mapping.
+     * 
+     * @return the table for the sample rates
+     */
+    private static int[][] createSampleRateTable()
+    {
+        int[][] arr = new int[4][];
+        arr[AudioFrame.MPEG_V1] = SAMPLE_RATE_MPEG1;
+        arr[AudioFrame.MPEG_V2] = SAMPLE_RATE_MPEG2;
+        arr[AudioFrame.MPEG_V2_5] = SAMPLE_RATE_MPEG2_5;
+        return arr;
+    }
+
+    /**
+     * A class representing the bit field of an MPEG header. It allows
+     * convenient access to specific bit groups.
+     */
+    private static class HeaderBitField
+    {
+        /** The internal value. */
+        private int value;
+
+        /**
+         * Adds a byte to this field.
+         * 
+         * @param b the byte to be added
+         */
+        public void add(int b)
+        {
+            value <<= 8;
+            value |= b;
+        }
+
+        /**
+         * Returns the value of the bit group from the given start and end
+         * index. E.g. ''from'' = 0, ''to'' = 3 will return the value of the
+         * first 4 bits.
+         * 
+         * @param the from index
+         * @param to the to index
+         * @return the value of this group of bits
+         */
+        public int get(int from, int to)
+        {
+            int shiftVal = value >> from;
+            int mask = (1 << (to - from + 1)) - 1;
+            return shiftVal & mask;
+        }
+
+        /**
+         * Returns the value of the bit with the given index. The bit index is
+         * 0-based. Result is either 0 or 1, depending on the value of this bit.
+         * 
+         * @param bit the bit index
+         * @return the value of this bit
+         */
+        public int get(int bit)
+        {
+            return get(bit, bit);
+        }
+
+        /**
+         * Returns the internal value of this field as an array. The array
+         * contains 3 bytes.
+         * 
+         * @return the internal value of this field as int array
+         */
+        public byte[] toArray()
+        {
+            byte[] result = new byte[3];
+            result[0] = (byte) get(16, 23);
+            result[1] = (byte) get(8, 15);
+            result[2] = (byte) get(0, 7);
+            return result;
+        }
+    }
+}

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java Mon Feb  4 22:24:01 2013
@@ -36,6 +36,16 @@ import org.xml.sax.ContentHandler;
 public class Mp3ParserTest extends TestCase {
 
     /**
+     * Checks the duration of an MP3 file.
+     * @param metadata the metadata object
+     * @param expected the expected duration, rounded as seconds
+     */
+    private static void checkDuration(Metadata metadata, int expected) {
+        assertEquals("Wrong duration", expected,
+                Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
+    }
+
+    /**
      * Test that with only ID3v1 tags, we get some information out   
      */
     public void testMp3ParsingID3v1() throws Exception {
@@ -67,6 +77,7 @@ public class Mp3ParserTest extends TestC
         assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
         assertEquals("44100", metadata.get("samplerate"));
         assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
     }
 
     /**
@@ -118,6 +129,7 @@ public class Mp3ParserTest extends TestC
         assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
         assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
         assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
+        checkDuration(metadata, 2);
     }
 
     /**
@@ -153,6 +165,7 @@ public class Mp3ParserTest extends TestC
         assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
         assertEquals("44100", metadata.get("samplerate"));
         assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
     }
 
     /**
@@ -188,6 +201,7 @@ public class Mp3ParserTest extends TestC
         assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
         assertEquals("44100", metadata.get("samplerate"));
         assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
     }
     
     /**
@@ -222,6 +236,7 @@ public class Mp3ParserTest extends TestC
        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
        assertEquals("44100", metadata.get("samplerate"));
        assertEquals("1", metadata.get("channels"));
+       checkDuration(metadata, 2);
    }
     
     
@@ -262,6 +277,7 @@ public class Mp3ParserTest extends TestC
         assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
         assertEquals("44100", metadata.get("samplerate"));
         assertEquals("2", metadata.get("channels"));
+        checkDuration(metadata, 1);
     }
     
     public void testID3v2Frame() throws Exception {

Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java Mon Feb  4 22:24:01 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.junit.After;
+import org.junit.Test;
+
+/**
+ * Test class for {@code MpegStream}.
+ */
+public class MpegStreamTest
+{
+    /** The stream to be tested. */
+    private MpegStream stream;
+
+    @After
+    public void tearDown() throws Exception
+    {
+        if (stream != null)
+        {
+            stream.close();
+        }
+    }
+
+    /**
+     * Tests whether the default test header can be found in a stream.
+     * 
+     * @param bos the stream
+     * @throws IOException if an error occurs
+     */
+    private void checkDefaultHeader(ByteArrayOutputStream bos)
+            throws IOException
+    {
+        ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+        stream = new MpegStream(in);
+        AudioFrame header = stream.nextFrame();
+        assertNotNull("No header found", header);
+        assertEquals("Wrong MPEG version", AudioFrame.MPEG_V2,
+                header.getVersionCode());
+        assertEquals("Wrong layer", AudioFrame.LAYER_3, header.getLayer());
+        assertEquals("Wrong bit rate", 80000, header.getBitRate());
+        assertEquals("Wrong sample rate", 24000, header.getSampleRate());
+    }
+
+    /**
+     * Writes the given byte the given number of times into an output stream.
+     * 
+     * @param out the output stream
+     * @param value the value to write
+     * @param count the number of bytes to write
+     * @throws IOException if an error occurs
+     */
+    private static void writeBytes(OutputStream out, int value, int count)
+            throws IOException
+    {
+        for (int i = 0; i < count; i++)
+        {
+            out.write(value);
+        }
+    }
+
+    /**
+     * Writes a frame header in the given output stream.
+     * 
+     * @param out the output stream
+     * @param b2 byte 2 of the header
+     * @param b3 byte 3 of the header
+     * @param b4 byte 4 of the header
+     * @throws IOException if an error occurs
+     */
+    private static void writeFrame(OutputStream out, int b2, int b3, int b4)
+            throws IOException
+    {
+        out.write(0xFF);
+        out.write(b2);
+        out.write(b3);
+        out.write(b4);
+    }
+
+    /**
+     * Tests whether an audio frame header can be found somewhere in a stream.
+     */
+    @Test
+    public void testSearchNextFrame() throws IOException
+    {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        writeBytes(bos, 0xFF, 32);
+        writeBytes(bos, 0, 16);
+        writeBytes(bos, 0xFF, 8);
+        bos.write(0xF3);
+        bos.write(0x96);
+        bos.write(0);
+        checkDefaultHeader(bos);
+    }
+
+    /**
+     * Tests whether invalid frame headers are detected and skipped.
+     */
+    @Test
+    public void testSearchNextFrameInvalid() throws IOException
+    {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        writeFrame(bos, 0xEB, 0x96, 0);
+        writeFrame(bos, 0xF9, 0x96, 0);
+        writeFrame(bos, 0xF3, 0, 0);
+        writeFrame(bos, 0xF3, 0xF0, 0);
+        writeFrame(bos, 0xF3, 0x7C, 0);
+        writeFrame(bos, 0xF3, 0x96, 0);
+        checkDefaultHeader(bos);
+    }
+
+    /**
+     * Tests a search for another frame which is interrupted because the stream
+     * ends.
+     */
+    @Test
+    public void testSeachNextFrameEOS() throws IOException
+    {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        bos.write(0xFF);
+        bos.write(0xFF);
+        bos.write(0xF3);
+        bos.write(0x96);
+        ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+        stream = new MpegStream(in);
+        assertNull("Got a frame", stream.nextFrame());
+    }
+
+    /**
+     * Tries to skip a frame if no current header is available.
+     */
+    @Test
+    public void testSkipNoCurrentHeader() throws IOException
+    {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        bos.write("This is a test".getBytes());
+        ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+        stream = new MpegStream(in);
+        assertFalse("Wrong result", stream.skipFrame());
+    }
+}