You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/02/04 23:24:02 UTC
svn commit: r1442402 - in /tika/trunk: ./
tika-core/src/main/java/org/apache/tika/io/
tika-core/src/test/java/org/apache/tika/io/
tika-parsers/src/main/java/org/apache/tika/parser/mp3/
tika-parsers/src/test/java/org/apache/tika/parser/mp3/
Author: nick
Date: Mon Feb 4 22:24:01 2013
New Revision: 1442402
URL: http://svn.apache.org/viewvc?rev=1442402&view=rev
Log:
Apply patch from Oliver Heger from TIKA-991 - Re-work MP3 parser to capture audio duration by processing more of the audio frames
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Feb 4 22:24:01 2013
@@ -6,6 +6,8 @@ Release 1.4 Current Development
* RTF: Ordered and unordered lists are now extracted (TIKA-1062).
+ * MP3: Audio duration is now extracted (TIKA-991)
+
Release 1.3 - 01/19/2013
* Mimetype definitions added for more common programming languages,
Added: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TailStream.java Mon Feb 4 22:24:01 2013
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.io;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * <p>
+ * A specialized input stream implementation which records the last portion read
+ * from an underlying stream.
+ * </p>
+ * <p>
+ * This stream implementation is useful to deal with information which is known
+ * to be located at the end of a stream (e.g. ID3 v1 tags). While reading bytes
+ * from the underlying stream, a given number of bytes is kept in an internal
+ * buffer. This buffer can then be queried after the whole stream was read. It
+ * contains the last bytes read from the original input stream.
+ * </p>
+ *
+ * @param in the underlying input stream
+ * @param tailSize the size of the tail buffer
+ */
+public class TailStream extends FilterInputStream
+{
+ /** Constant for the default skip buffer size. */
+ private static final int SKIP_SIZE = 4096;
+
+ /** The buffer in which the tail data is stored. */
+ private final byte[] tailBuffer;
+
+ /** The size of the internal tail buffer. */
+ private final int tailSize;
+
+ /** A copy of the internal tail buffer used for mark() operations. */
+ private byte[] markBuffer;
+
+ /** The number of bytes that have been read so far. */
+ private long bytesRead;
+
+ /** The number of bytes read at the last mark() operation. */
+ private long markBytesRead;
+
+ /** The current index into the tail buffer. */
+ private int currentIndex;
+
+ /** A copy of the current index used for mark() operations. */
+ private int markIndex;
+
+ /**
+ * Creates a new instance of {@code TailStream}.
+ *
+ * @param in the underlying input stream
+ * @param size the size of the tail buffer
+ */
+ public TailStream(InputStream in, int size)
+ {
+ super(in);
+ tailSize = size;
+ tailBuffer = new byte[size];
+ }
+
+ /**
+ * {@inheritDoc} This implementation adds the read byte to the internal tail
+ * buffer.
+ */
+ @Override
+ public int read() throws IOException
+ {
+ int c = super.read();
+ if (c != -1)
+ {
+ appendByte((byte) c);
+ }
+ return c;
+ }
+
+ /**
+ * {@inheritDoc} This implementation delegates to the underlying stream and
+ * then adds the correct portion of the read buffer to the internal tail
+ * buffer.
+ */
+ @Override
+ public int read(byte[] buf) throws IOException
+ {
+ int read = super.read(buf);
+ if (read > 0)
+ {
+ appendBuf(buf, 0, read);
+ }
+ return read;
+ }
+
+ /**
+ * {@inheritDoc} This implementation delegates to the underlying stream and
+ * then adds the correct portion of the read buffer to the internal tail
+ * buffer.
+ */
+ @Override
+ public int read(byte[] buf, int ofs, int length) throws IOException
+ {
+ int read = super.read(buf, ofs, length);
+ if (read > 0)
+ {
+ appendBuf(buf, ofs, read);
+ }
+ return read;
+ }
+
+ /**
+ * {@inheritDoc} This implementation delegates to the {@code read()} method
+ * to ensure that the tail buffer is also filled if data is skipped.
+ */
+ @Override
+ public long skip(long n) throws IOException
+ {
+ int bufSize = (int) Math.min(n, SKIP_SIZE);
+ byte[] buf = new byte[bufSize];
+ long bytesSkipped = 0;
+ int bytesRead = 0;
+
+ while(bytesSkipped < n && bytesRead != -1)
+ {
+ int len = (int) Math.min(bufSize, n - bytesSkipped);
+ bytesRead = read(buf, 0, len);
+ if(bytesRead != -1)
+ {
+ bytesSkipped += bytesRead;
+ }
+ }
+
+ return bytesSkipped;
+ }
+
+ /**
+ * {@inheritDoc} This implementation saves the internal state including the
+ * content of the tail buffer so that it can be restored when ''reset()'' is
+ * called later.
+ */
+ @Override
+ public void mark(int limit)
+ {
+ markBuffer = new byte[tailSize];
+ System.arraycopy(tailBuffer, 0, markBuffer, 0, tailSize);
+ markIndex = currentIndex;
+ markBytesRead = bytesRead;
+ }
+
+ /**
+ * {@inheritDoc} This implementation restores this stream's state to the
+ * state when ''mark()'' was called the last time. If ''mark()'' has not
+ * been called before, this method has no effect.
+ */
+ @Override
+ public void reset()
+ {
+ if (markBuffer != null)
+ {
+ System.arraycopy(markBuffer, 0, tailBuffer, 0, tailSize);
+ currentIndex = markIndex;
+ bytesRead = markBytesRead;
+ }
+ }
+
+ /**
+ * Returns an array with the last data read from the underlying stream. If
+ * the underlying stream contained more data than the ''tailSize''
+ * constructor argument, the returned array has a length of ''tailSize''.
+ * Otherwise, its length equals the number of bytes read.
+ *
+ * @return an array with the last data read from the underlying stream
+ */
+ public byte[] getTail()
+ {
+ int size = (int) Math.min(tailSize, bytesRead);
+ byte[] result = new byte[size];
+ System.arraycopy(tailBuffer, currentIndex, result, 0, size
+ - currentIndex);
+ System.arraycopy(tailBuffer, 0, result, size - currentIndex,
+ currentIndex);
+ return result;
+ }
+
+ /**
+ * Adds the given byte to the internal tail buffer.
+ *
+ * @param b the byte to be added
+ */
+ private void appendByte(byte b)
+ {
+ tailBuffer[currentIndex++] = b;
+ if (currentIndex >= tailSize)
+ {
+ currentIndex = 0;
+ }
+ bytesRead++;
+ }
+
+ /**
+ * Adds the content of the given buffer to the internal tail buffer.
+ *
+ * @param buf the buffer
+ * @param ofs the start offset in the buffer
+ * @param length the number of bytes to be copied
+ */
+ private void appendBuf(byte[] buf, int ofs, int length)
+ {
+ if (length >= tailSize)
+ {
+ replaceTailBuffer(buf, ofs, length);
+ }
+ else
+ {
+ copyToTailBuffer(buf, ofs, length);
+ }
+
+ bytesRead += length;
+ }
+
+ /**
+ * Replaces the content of the internal tail buffer by the last portion of
+ * the given buffer. This method is called if a buffer was read from the
+ * underlying stream whose length is larger than the tail buffer.
+ *
+ * @param buf the buffer
+ * @param ofs the start offset in the buffer
+ * @param length the number of bytes to be copied
+ */
+ private void replaceTailBuffer(byte[] buf, int ofs, int length)
+ {
+ System.arraycopy(buf, ofs + length - tailSize, tailBuffer, 0, tailSize);
+ currentIndex = 0;
+ }
+
+ /**
+ * Copies the given buffer into the internal tail buffer at the current
+ * position. This method is called if a buffer is read from the underlying
+ * stream whose length is smaller than the tail buffer. In this case the
+ * tail buffer is only partly overwritten.
+ *
+ * @param buf the buffer
+ * @param ofs the start offset in the buffer
+ * @param length the number of bytes to be copied
+ */
+ private void copyToTailBuffer(byte[] buf, int ofs, int length)
+ {
+ int remaining = tailSize - currentIndex;
+ int size1 = Math.min(remaining, length);
+ System.arraycopy(buf, ofs, tailBuffer, currentIndex, size1);
+ System.arraycopy(buf, ofs + size1, tailBuffer, 0, length - size1);
+ currentIndex = (currentIndex + length) % tailSize;
+ }
+}
Added: tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TailStreamTest.java Mon Feb 4 22:24:01 2013
@@ -0,0 +1,201 @@
+package org.apache.tika.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.junit.Test;
+
+/**
+ * Test class for {@code TailStream}.
+ */
+public class TailStreamTest
+{
+ /** Constant for generating test text. */
+ private static final String TEXT =
+ "Lorem ipsum dolor sit amet, consetetur "
+ + "sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut "
+ + "labore et dolore magna aliquyam erat, sed diam voluptua. At vero"
+ + " eos et accusam et justo duo dolores et ea rebum. Stet clita "
+ + "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor "
+ + "sit amet.";
+
+ /**
+ * Generates a test text using the specified parameters.
+ *
+ * @param from the start index of the text
+ * @param length the length of the text
+ * @return the generated test text
+ */
+ private static String generateText(int from, int length)
+ {
+ int count = from + length;
+ StringBuilder buf = new StringBuilder(count);
+ while (buf.length() < count)
+ {
+ buf.append(TEXT);
+ }
+ return buf.substring(from, from + length);
+ }
+
+ /**
+ * Generates a stream which contains a test text.
+ *
+ * @param from the start index of the text
+ * @param length the length of the generated stream
+ * @return the stream with the test text
+ */
+ private static InputStream generateStream(int from, int length)
+ {
+ return new ByteArrayInputStream(generateText(from, length).getBytes());
+ }
+
+ /**
+ * Helper method for reading the content of an input stream.
+ *
+ * @param in the stream to be read
+ * @return an array with the content of the stream
+ * @throws IOException if an error occurs
+ */
+ private static byte[] readStream(InputStream in) throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ int c;
+ while ((c = in.read()) != -1)
+ {
+ bos.write(c);
+ }
+ return bos.toByteArray();
+ }
+
+ /**
+ * Tests whether the tail buffer can be obtained before data was read.
+ */
+ @Test
+ public void testTailBeforeRead() throws IOException
+ {
+ TailStream stream = new TailStream(generateStream(0, 100), 50);
+ assertEquals("Wrong buffer length", 0, stream.getTail().length);
+ stream.close();
+ }
+
+ /**
+ * Tests the content of the tail buffer if it is only partly filled.
+ */
+ @Test
+ public void testTailBufferPartlyRead() throws IOException
+ {
+ final int count = 64;
+ TailStream stream = new TailStream(generateStream(0, count), 2 * count);
+ byte[] data = readStream(stream);
+ assertTrue("Wrong content", Arrays.equals(data, stream.getTail()));
+ stream.close();
+ }
+
+ /**
+ * Tests the content of the tail buffer if only single bytes were read.
+ */
+ @Test
+ public void testTailSingleByteReads() throws IOException
+ {
+ final int count = 128;
+ TailStream stream = new TailStream(generateStream(0, 2 * count), count);
+ readStream(stream);
+ assertEquals("Wrong buffer", generateText(count, count), new String(
+ stream.getTail()));
+ }
+
+ /**
+ * Tests the content of the tail buffer if larger chunks are read.
+ */
+ @Test
+ public void testTailChunkReads() throws IOException
+ {
+ final int count = 16384;
+ final int tailSize = 61;
+ final int bufSize = 100;
+ TailStream stream = new TailStream(generateStream(0, count), tailSize);
+ byte[] buf = new byte[bufSize];
+ int read = stream.read(buf, 10, 8);
+ assertEquals("Wrong number of bytes read", 8, read);
+ while (read != -1)
+ {
+ read = stream.read(buf);
+ }
+ assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
+ new String(stream.getTail()));
+ stream.close();
+ }
+
+ /**
+ * Tests whether mark() and reset() work as expected.
+ */
+ @Test
+ public void testReadWithMarkAndReset() throws IOException
+ {
+ final int tailSize = 64;
+ TailStream stream =
+ new TailStream(generateStream(0, 2 * tailSize), tailSize);
+ byte[] buf = new byte[tailSize / 2];
+ stream.read(buf);
+ stream.mark(tailSize);
+ stream.read(buf);
+ stream.reset();
+ readStream(stream);
+ assertEquals("Wrong buffer", generateText(tailSize, tailSize),
+ new String(stream.getTail()));
+ }
+
+ /**
+ * Tests whether a reset() operation without a mark is simply ignored.
+ */
+ @Test
+ public void testResetWithoutMark() throws IOException
+ {
+ final int tailSize = 75;
+ final int count = 128;
+ TailStream stream = new TailStream(generateStream(0, count), tailSize);
+ stream.reset();
+ byte[] buf = new byte[count];
+ stream.read(buf);
+ assertEquals("Wrong buffer", generateText(count - tailSize, tailSize),
+ new String(stream.getTail()));
+ stream.close();
+ }
+
+ /**
+ * Tests whether skip() also fills the tail buffer.
+ */
+ @Test
+ public void testSkip() throws IOException
+ {
+ final int tailSize = 128;
+ final int count = 1024;
+ final int skipCount = 512;
+ TailStream stream = new TailStream(generateStream(0, count), tailSize);
+ assertEquals("Wrong skip result", skipCount, stream.skip(skipCount));
+ assertEquals("Wrong buffer",
+ generateText(skipCount - tailSize, tailSize),
+ new String(stream.getTail()));
+ stream.close();
+ }
+
+ /**
+ * Tests a skip operation at the end of the stream.
+ */
+ @Test
+ public void testSkipEOS() throws IOException
+ {
+ final int count = 128;
+ TailStream stream = new TailStream(generateStream(0, count), 2 * count);
+ assertEquals("Wrong skip result", count, stream.skip(2 * count));
+ assertEquals("Wrong buffer", generateText(0, count),
+ new String(stream.getTail()));
+ stream.close();
+ }
+}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java Mon Feb 4 22:24:01 2013
@@ -28,9 +28,32 @@ import org.xml.sax.SAXException;
* Currently, only the header is processed, not the raw audio data.
*/
public class AudioFrame implements MP3Frame {
- private String version;
- private int sampleRate;
- private int channels;
+ /** Constant for the MPEG version 1. */
+ public static final int MPEG_V1 = 3;
+
+ /** Constant for the MPEG version 2. */
+ public static final int MPEG_V2 = 2;
+
+ /** Constant for the MPEG version 2.5. */
+ public static final int MPEG_V2_5 = 0;
+
+ /** Constant for audio layer 1. */
+ public static final int LAYER_1 = 3;
+
+ /** Constant for audio layer 2. */
+ public static final int LAYER_2 = 2;
+
+ /** Constant for audio layer 3. */
+ public static final int LAYER_3 = 1;
+
+ private final String version;
+ private final int versionCode;
+ private final int layer;
+ private final int sampleRate;
+ private final int channels;
+ private final int bitRate;
+ private final int length;
+ private final float duration;
public String getVersion() {
return version;
@@ -51,6 +74,51 @@ public class AudioFrame implements MP3Fr
}
/**
+ * Get the version code.
+ * @return the version code (one of the {@code MPEG} constants)
+ */
+ public int getVersionCode()
+ {
+ return versionCode;
+ }
+
+ /**
+ * Get the audio layer code.
+ * @return the audio layer (one of the {@code LAYER} constants)
+ */
+ public int getLayer()
+ {
+ return layer;
+ }
+
+ /**
+ * Get the bit rate in bit per second.
+ * @return the bit rate
+ */
+ public int getBitRate()
+ {
+ return bitRate;
+ }
+
+ /**
+ * Returns the frame length in bytes.
+ * @return the frame length
+ */
+ public int getLength()
+ {
+ return length;
+ }
+
+ /**
+ * Returns the duration in milliseconds.
+ * @return the duration
+ */
+ public float getDuration()
+ {
+ return duration;
+ }
+
+ /**
* Does this appear to be a 4 byte audio frame header?
*/
public static boolean isAudioHeader(int h1, int h2, int h3, int h4) {
@@ -65,12 +133,19 @@ public class AudioFrame implements MP3Fr
return false;
}
-
+ /**
+ * @deprecated Use the constructor which is passed all values directly.
+ */
+ @Deprecated
public AudioFrame(InputStream stream, ContentHandler handler)
throws IOException, SAXException, TikaException {
this(-2, -2, -2, -2, stream);
}
+ /**
+ * @deprecated Use the constructor which is passed all values directly.
+ */
+ @Deprecated
public AudioFrame(int h1, int h2, int h3, int h4, InputStream in)
throws IOException {
if (h1 == -2 && h2 == -2 && h3 == -2 && h4 == -2) {
@@ -81,46 +156,28 @@ public class AudioFrame implements MP3Fr
}
if (isAudioHeader(h1, h2, h3, h4)) {
- version = "MPEG 3 Layer ";
- int layer = (h2 >> 1) & 0x03;
- if (layer == 1) {
- version += "III";
- } else if (layer == 2) {
- version += "II";
- } else if (layer == 3) {
- version += "I";
- } else {
- version += "(reserved)";
- }
-
- version += " Version ";
- int ver = (h2 >> 3) & 0x03;
- if (ver == 0) {
- version += "2.5";
- } else if(ver == 2) {
- version += "2";
- } else if(ver == 3) {
- version += "1";
- } else {
- version += "(reseved)";
- }
-
- int rate = (h3 >> 2) & 0x03;
- switch (rate) {
+ layer = (h2 >> 1) & 0x03;
+ versionCode = (h2 >> 3) & 0x03;
+ version = generateVersionStr(versionCode, layer);
+
+ int rateCode = (h3 >> 2) & 0x03;
+ int rate;
+ switch (rateCode) {
case 0:
- sampleRate = 11025;
+ rate = 11025;
break;
case 1:
- sampleRate = 12000;
+ rate = 12000;
break;
default:
- sampleRate = 8000;
+ rate = 8000;
}
- if (ver == 2) {
- sampleRate *= 2;
- } else if(ver == 3) {
- sampleRate *= 4;
+ if (versionCode == MPEG_V2) {
+ rate *= 2;
+ } else if(versionCode == MPEG_V1) {
+ rate *= 4;
}
+ sampleRate = rate;
int chans = h4 & 0x192;
if (chans < 3) {
@@ -129,9 +186,67 @@ public class AudioFrame implements MP3Fr
} else {
channels = 1;
}
+ bitRate = 0;
+ duration = 0;
+ length = 0;
} else {
throw new IllegalArgumentException("Magic Audio Frame Header not found");
}
}
+
+ /**
+ *
+ * Creates a new instance of {@code AudioFrame} and initializes all properties.
+ * @param mpegVersion the code for the MPEG version
+ * @param layer the code for the layer
+ * @param bitRate the bit rate (in bps)
+ * @param sampleRate the sample rate (in samples per second)
+ * @param channels the number of channels
+ * @param length the frame length (in bytes)
+ * @param duration the duration of this frame (in milliseconds)
+ */
+ public AudioFrame(int mpegVersion, int layer, int bitRate, int sampleRate,
+ int channels, int length, float duration) {
+ versionCode = mpegVersion;
+ this.layer = layer;
+ this.bitRate = bitRate;
+ this.sampleRate = sampleRate;
+ this.channels = channels;
+ this.length = length;
+ this.duration = duration;
+ version = generateVersionStr(mpegVersion, layer);
+ }
+ /**
+ * Generates a string for the version of this audio frame.
+ * @param version the code for the MPEG version
+ * @param layer the code for the layer
+ * @return a string for the version
+ */
+ private static String generateVersionStr(int version, int layer) {
+ StringBuilder buf = new StringBuilder(64);
+ buf.append("MPEG 3 Layer ");
+ if (layer == LAYER_3) {
+ buf.append("III");
+ } else if (layer == LAYER_2) {
+ buf.append("II");
+ } else if (layer == LAYER_1) {
+ buf.append("I");
+ } else {
+ buf.append("(reserved)");
+ }
+
+ buf.append(" Version ");
+ if (version == MPEG_V2_5) {
+ buf.append("2.5");
+ } else if(version == MPEG_V2) {
+ buf.append("2");
+ } else if(version == MPEG_V1) {
+ buf.append("1");
+ } else {
+ buf.append("(reseved)");
+ }
+
+ return buf.toString();
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Mon Feb 4 22:24:01 2013
@@ -18,6 +18,7 @@ package org.apache.tika.parser.mp3;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PushbackInputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
@@ -61,10 +62,12 @@ public class ID3v2Frame implements MP3Fr
}
/**
- * Returns the next Frame (ID3v2 or Audio) in
+ * Returns the next ID3v2 Frame in
* the file, or null if the next batch of data
- * doesn't correspond to either an ID3v2 Frame
- * or an Audio Frame.
+ * doesn't correspond to either an ID3v2 header.
+ * If no ID3v2 frame could be detected and the passed in input stream is a
+ * {@code PushbackInputStream}, the bytes read so far are pushed back so
+ * that they can be read again.
* ID3v2 Frames should come before all Audio ones.
*/
public static MP3Frame createFrameIfPresent(InputStream inp)
@@ -78,21 +81,39 @@ public class ID3v2Frame implements MP3Fr
int majorVersion = inp.read();
int minorVersion = inp.read();
if (majorVersion == -1 || minorVersion == -1) {
+ pushBack(inp, h1, h2, h3, majorVersion, minorVersion);
return null;
}
return new ID3v2Frame(majorVersion, minorVersion, inp);
}
-
- // Is it an Audio Frame?
- int h4 = inp.read();
- if (AudioFrame.isAudioHeader(h1, h2, h3, h4)) {
- return new AudioFrame(h1, h2, h3, h4, inp);
- }
-
+
// Not a frame header
+ pushBack(inp, h1, h2, h3);
return null;
}
+ /**
+ * Pushes bytes back into the stream if possible. This method is called if
+ * no ID3v2 header could be found at the current stream position.
+ *
+ * @param inp the input stream
+ * @param bytes the bytes to be pushed back
+ * @throws IOException if an error occurs
+ */
+ private static void pushBack(InputStream inp, int... bytes)
+ throws IOException
+ {
+ if (inp instanceof PushbackInputStream)
+ {
+ byte[] buf = new byte[bytes.length];
+ for (int i = 0; i < bytes.length; i++)
+ {
+ buf[i] = (byte) bytes[i];
+ }
+ ((PushbackInputStream) inp).unread(buf);
+ }
+ }
+
private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp)
throws IOException {
this.majorVersion = majorVersion;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java Mon Feb 4 22:24:01 2013
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Set;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TailStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
@@ -79,7 +80,8 @@ public class Mp3Parser extends AbstractP
metadata.set(XMPDM.ALBUM, tag.getAlbum());
metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
metadata.set(XMPDM.GENRE, tag.getGenre());
-
+ metadata.set(XMPDM.DURATION, audioAndTags.duration);
+
List<String> comments = new ArrayList<String>();
for (ID3Comment comment : tag.getComments()) {
StringBuffer cmt = new StringBuffer();
@@ -113,6 +115,7 @@ public class Mp3Parser extends AbstractP
}
xhtml.element("p", tag.getYear());
xhtml.element("p", tag.getGenre());
+ xhtml.element("p", String.valueOf(audioAndTags.duration));
for (String comment : comments) {
xhtml.element("p", comment);
}
@@ -157,11 +160,14 @@ public class Mp3Parser extends AbstractP
LyricsHandler lyrics = null;
AudioFrame firstAudio = null;
+ TailStream tailStream = new TailStream(stream, 10240+128);
+ MpegStream mpegStream = new MpegStream(tailStream);
+
// ID3v2 tags live at the start of the file
// You can apparently have several different ID3 tag blocks
// So, keep going until we don't find any more
MP3Frame f;
- while ((f = ID3v2Frame.createFrameIfPresent(stream)) != null && firstAudio == null) {
+ while ((f = ID3v2Frame.createFrameIfPresent(mpegStream)) != null) {
if(f instanceof ID3v2Frame) {
ID3v2Frame id3F = (ID3v2Frame)f;
if (id3F.getMajorVersion() == 4) {
@@ -171,15 +177,27 @@ public class Mp3Parser extends AbstractP
} else if(id3F.getMajorVersion() == 2) {
v22 = new ID3v22Handler(id3F);
}
- } else if(f instanceof AudioFrame) {
- firstAudio = (AudioFrame)f;
}
}
+ // Now iterate over all audio frames in the file
+ AudioFrame frame = mpegStream.nextFrame();
+ float duration = 0;
+ while (frame != null)
+ {
+ duration += frame.getDuration();
+ if (firstAudio == null)
+ {
+ firstAudio = frame;
+ }
+ mpegStream.skipFrame();
+ frame = mpegStream.nextFrame();
+ }
+
// ID3v1 tags live at the end of the file
// Lyrics live just before ID3v1, at the end of the file
// Search for both (handlers seek to the end for us)
- lyrics = new LyricsHandler(stream, handler);
+ lyrics = new LyricsHandler(tailStream.getTail());
v1 = lyrics.id3v1;
// Go in order of preference
@@ -203,6 +221,7 @@ public class Mp3Parser extends AbstractP
ret.audio = firstAudio;
ret.lyrics = lyrics;
ret.tags = tags.toArray(new ID3Tags[tags.size()]);
+ ret.duration = duration;
return ret;
}
@@ -210,6 +229,7 @@ public class Mp3Parser extends AbstractP
private ID3Tags[] tags;
private AudioFrame audio;
private LyricsHandler lyrics;
+ private float duration;
}
}
Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java Mon Feb 4 22:24:01 2013
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * <p>
+ * A specialized stream class which can be used to extract single frames of MPEG
+ * audio files.
+ * </p>
+ * <p>
+ * Instances of this class are constructed with an underlying stream which
+ * should point to an audio file. Read operations are possible in the usual way.
+ * However, there are special methods for searching and extracting headers of
+ * MPEG frames. Some meta information of frames can be queried.
+ * </p>
+ */
+class MpegStream extends PushbackInputStream
+{
+ /** Bit rate table for MPEG V1, layer 1. */
+ private static final int[] BIT_RATE_MPEG1_L1 = {
+ 0, 32000, 64000, 96000, 128000, 160000, 192000, 224000, 256000,
+ 288000, 320000, 352000, 384000, 416000, 448000
+ };
+
+ /** Bit rate table for MPEG V1, layer 2. */
+ private static final int[] BIT_RATE_MPEG1_L2 = {
+ 0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+ 160000, 192000, 224000, 256000, 320000, 384000
+ };
+
+ /** Bit rate table for MPEG V1, layer 3. */
+ private static final int[] BIT_RATE_MPEG1_L3 = {
+ 0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+ 160000, 192000, 224000, 256000, 320000
+ };
+
+ /** Bit rate table for MPEG V2/V2.5, layer 1. */
+ private static final int[] BIT_RATE_MPEG2_L1 = {
+ 0, 32000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
+ 144000, 160000, 176000, 192000, 224000, 256000
+ };
+
+ /** Bit rate table for MPEG V2/V2.5, layer 2 and 3. */
+ private static final int[] BIT_RATE_MPEG2_L2 = {
+ 0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000,
+ 96000, 112000, 128000, 144000, 160000
+ };
+
+ /** Sample rate table for MPEG V1. */
+ private static final int[] SAMPLE_RATE_MPEG1 = {
+ 44100, 48000, 32000
+ };
+
+ /** Sample rate table for MPEG V2. */
+ private static final int[] SAMPLE_RATE_MPEG2 = {
+ 22050, 24000, 16000
+ };
+
+ /** Sample rate table for MPEG V2.5. */
+ private static final int[] SAMPLE_RATE_MPEG2_5 = {
+ 11025, 12000, 8000
+ };
+
+ /** Sample rate table for all MPEG versions. */
+ private static final int[][] SAMPLE_RATE = createSampleRateTable();
+
+ /** Constant for the number of samples for a layer 1 frame. */
+ private static final int SAMPLE_COUNT_L1 = 384;
+
+ /** Constant for the number of samples for a layer 2 or 3 frame. */
+ private static final int SAMPLE_COUNT_L2 = 1152;
+
+ /** Constant for the size of an MPEG frame header in bytes. */
+ private static final int HEADER_SIZE = 4;
+
+ /** The current MPEG header. */
+ private AudioFrame currentHeader;
+
+ /** A flag whether the end of the stream is reached. */
+ private boolean endOfStream;
+
+ /**
+ * Creates a new instance of {@code MpegStream} and initializes it with the
+ * underlying stream.
+ *
+ * @param in the underlying audio stream
+ */
+ public MpegStream(InputStream in)
+ {
+ super(in, 2 * HEADER_SIZE);
+ }
+
+ /**
+ * Searches for the next MPEG frame header from the current stream position
+ * on. This method advances the underlying input stream until it finds a
+ * valid frame header or the end of the stream is reached. In the former
+ * case a corresponding {@code AudioFrame} object is created. In the latter
+ * case there are no more headers, so the end of the stream is probably
+ * reached.
+ *
+ * @return the next {@code AudioFrame} or <b>null</b>
+ * @throws IOException if an IO error occurs
+ */
+ public AudioFrame nextFrame() throws IOException
+ {
+ AudioFrame frame = null;
+ while (!endOfStream && frame == null)
+ {
+ findFrameSyncByte();
+ if (!endOfStream)
+ {
+ HeaderBitField headerField = createHeaderField();
+ if (!endOfStream)
+ {
+ frame = createHeader(headerField);
+ if (frame == null)
+ {
+ pushBack(headerField);
+ }
+ }
+ }
+ }
+
+ currentHeader = frame;
+ return frame;
+ }
+
+ /**
+ * Skips the current MPEG frame. This method can be called after a valid
+ * MPEG header has been retrieved using {@code nextFrame()}. In this case
+ * the underlying stream is advanced to the end of the associated MPEG
+ * frame. Otherwise, this method has no effect. The return value indicates
+ * whether a frame could be skipped.
+ *
+ * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise
+ * @throws IOException if an IO error occurs
+ */
+ public boolean skipFrame() throws IOException
+ {
+ if (currentHeader != null)
+ {
+ skipStream(in, currentHeader.getLength() - HEADER_SIZE);
+ currentHeader = null;
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Advances the underlying stream until the first byte of frame sync is
+ * found.
+ *
+ * @throws IOException if an error occurs
+ */
+ private void findFrameSyncByte() throws IOException
+ {
+ boolean found = false;
+ while (!found && !endOfStream)
+ {
+ if (nextByte() == 0xFF)
+ {
+ found = true;
+ }
+ }
+ }
+
+ /**
+ * Creates a bit field for the MPEG frame header.
+ *
+ * @return the bit field
+ * @throws IOException if an error occurs
+ */
+ private HeaderBitField createHeaderField() throws IOException
+ {
+ HeaderBitField field = new HeaderBitField();
+ field.add(nextByte());
+ field.add(nextByte());
+ field.add(nextByte());
+ return field;
+ }
+
+ /**
+ * Creates an {@code AudioFrame} object based on the given header field. If
+ * the header field contains invalid values, result is <b>null</b>.
+ *
+ * @param bits the header bit field
+ * @return the {@code AudioFrame}
+ */
+ private AudioFrame createHeader(HeaderBitField bits)
+ {
+ if (bits.get(21, 23) != 7)
+ {
+ return null;
+ }
+
+ int mpegVer = bits.get(19, 20);
+ int layer = bits.get(17, 18);
+ int bitRateCode = bits.get(12, 15);
+ int sampleRateCode = bits.get(10, 11);
+ int padding = bits.get(9);
+
+ if (mpegVer == 1 || layer == 0 || bitRateCode == 0 || bitRateCode == 15
+ || sampleRateCode == 3)
+ {
+ // invalid header values
+ return null;
+ }
+
+ int bitRate = calculateBitRate(mpegVer, layer, bitRateCode);
+ int sampleRate = calculateSampleRate(mpegVer, sampleRateCode);
+ int length = calculateFrameLength(layer, bitRate, sampleRate, padding);
+ float duration = calculateDuration(layer, sampleRate);
+ int channels = calculateChannels(bits.get(6, 7));
+ return new AudioFrame(mpegVer, layer, bitRate, sampleRate, channels,
+ length, duration);
+ }
+
+ /**
+ * Reads the next byte.
+ *
+ * @return the next byte
+ * @throws IOException if an error occurs
+ */
+ private int nextByte() throws IOException
+ {
+ int result = 0;
+ if (!endOfStream)
+ {
+ result = read();
+ if (result == -1)
+ {
+ endOfStream = true;
+ }
+ }
+ return endOfStream ? 0 : result;
+ }
+
+ /**
+ * Pushes the given header field back in the stream so that the bytes are
+ * read again. This method is called if an invalid header was detected. Then
+ * search has to continue at the next byte after the frame sync byte.
+ *
+ * @param field the header bit field with the invalid frame header
+ * @throws IOException if an error occurs
+ */
+ private void pushBack(HeaderBitField field) throws IOException
+ {
+ unread(field.toArray());
+ }
+
+ /**
+ * Skips the given number of bytes from the specified input stream.
+ *
+ * @param in the input stream
+ * @param count the number of bytes to skip
+ * @throws IOException if an IO error occurs
+ */
+ private static void skipStream(InputStream in, long count)
+ throws IOException
+ {
+ long size = count;
+ long skipped = 0;
+ while (size > 0 && skipped >= 0)
+ {
+ skipped = in.skip(size);
+ if (skipped != -1)
+ {
+ size -= skipped;
+ }
+ }
+ }
+
+ /**
+ * Calculates the bit rate based on the given parameters.
+ *
+ * @param mpegVer the MPEG version
+ * @param layer the layer
+ * @param code the code for the bit rate
+ * @return the bit rate in bits per second
+ */
+ private static int calculateBitRate(int mpegVer, int layer, int code)
+ {
+ int[] arr = null;
+
+ if (mpegVer == AudioFrame.MPEG_V1)
+ {
+ switch (layer)
+ {
+ case AudioFrame.LAYER_1:
+ arr = BIT_RATE_MPEG1_L1;
+ break;
+ case AudioFrame.LAYER_2:
+ arr = BIT_RATE_MPEG1_L2;
+ break;
+ case AudioFrame.LAYER_3:
+ arr = BIT_RATE_MPEG1_L3;
+ break;
+ }
+ }
+ else
+ {
+ if (layer == AudioFrame.LAYER_1)
+ {
+ arr = BIT_RATE_MPEG2_L1;
+ }
+ else
+ {
+ arr = BIT_RATE_MPEG2_L2;
+ }
+ }
+ return arr[code];
+ }
+
+ /**
+ * Calculates the sample rate based on the given parameters.
+ *
+ * @param mpegVer the MPEG version
+ * @param code the code for the sample rate
+ * @return the sample rate in samples per second
+ */
+ private static int calculateSampleRate(int mpegVer, int code)
+ {
+ return SAMPLE_RATE[mpegVer][code];
+ }
+
+ /**
+ * Calculates the length of an MPEG frame based on the given parameters.
+ *
+ * @param layer the layer
+ * @param bitRate the bit rate
+ * @param sampleRate the sample rate
+ * @param padding the padding flag
+ * @return the length of the frame in bytes
+ */
+ private static int calculateFrameLength(int layer, int bitRate,
+ int sampleRate, int padding)
+ {
+ if (layer == AudioFrame.LAYER_1)
+ {
+ return (12 * bitRate / sampleRate + padding) * 4;
+ }
+ else
+ {
+ return 144 * bitRate / sampleRate + padding;
+ }
+ }
+
+ /**
+ * Calculates the duration of a MPEG frame based on the given parameters.
+ *
+ * @param layer the layer
+ * @param sampleRate the sample rate
+ * @return the duration of this frame in milliseconds
+ */
+ private static float calculateDuration(int layer, int sampleRate)
+ {
+ int sampleCount =
+ (layer == AudioFrame.LAYER_1) ? SAMPLE_COUNT_L1
+ : SAMPLE_COUNT_L2;
+ return (1000.0f / sampleRate) * sampleCount;
+ }
+
+ /**
+ * Calculates the number of channels based on the given parameters.
+ *
+ * @param chan the code for the channels
+ * @return the number of channels
+ */
+ private static int calculateChannels(int chan)
+ {
+ return chan < 3 ? 2 : 1;
+ }
+
+ /**
+ * Creates the complete array for the sample rate mapping.
+ *
+ * @return the table for the sample rates
+ */
+ private static int[][] createSampleRateTable()
+ {
+ int[][] arr = new int[4][];
+ arr[AudioFrame.MPEG_V1] = SAMPLE_RATE_MPEG1;
+ arr[AudioFrame.MPEG_V2] = SAMPLE_RATE_MPEG2;
+ arr[AudioFrame.MPEG_V2_5] = SAMPLE_RATE_MPEG2_5;
+ return arr;
+ }
+
+ /**
+ * A class representing the bit field of an MPEG header. It allows
+ * convenient access to specific bit groups.
+ */
+ private static class HeaderBitField
+ {
+ /** The internal value. */
+ private int value;
+
+ /**
+ * Adds a byte to this field.
+ *
+ * @param b the byte to be added
+ */
+ public void add(int b)
+ {
+ value <<= 8;
+ value |= b;
+ }
+
+ /**
+ * Returns the value of the bit group from the given start and end
+ * index. E.g. ''from'' = 0, ''to'' = 3 will return the value of the
+ * first 4 bits.
+ *
+ * @param the from index
+ * @param to the to index
+ * @return the value of this group of bits
+ */
+ public int get(int from, int to)
+ {
+ int shiftVal = value >> from;
+ int mask = (1 << (to - from + 1)) - 1;
+ return shiftVal & mask;
+ }
+
+ /**
+ * Returns the value of the bit with the given index. The bit index is
+ * 0-based. Result is either 0 or 1, depending on the value of this bit.
+ *
+ * @param bit the bit index
+ * @return the value of this bit
+ */
+ public int get(int bit)
+ {
+ return get(bit, bit);
+ }
+
+ /**
+ * Returns the internal value of this field as an array. The array
+ * contains 3 bytes.
+ *
+ * @return the internal value of this field as int array
+ */
+ public byte[] toArray()
+ {
+ byte[] result = new byte[3];
+ result[0] = (byte) get(16, 23);
+ result[1] = (byte) get(8, 15);
+ result[2] = (byte) get(0, 7);
+ return result;
+ }
+ }
+}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=1442402&r1=1442401&r2=1442402&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java Mon Feb 4 22:24:01 2013
@@ -36,6 +36,16 @@ import org.xml.sax.ContentHandler;
public class Mp3ParserTest extends TestCase {
/**
+ * Checks the duration of an MP3 file.
+ * @param metadata the metadata object
+ * @param expected the expected duration, rounded as seconds
+ */
+ private static void checkDuration(Metadata metadata, int expected) {
+ assertEquals("Wrong duration", expected,
+ Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
+ }
+
+ /**
* Test that with only ID3v1 tags, we get some information out
*/
public void testMp3ParsingID3v1() throws Exception {
@@ -67,6 +77,7 @@ public class Mp3ParserTest extends TestC
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
}
/**
@@ -118,6 +129,7 @@ public class Mp3ParserTest extends TestC
assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
+ checkDuration(metadata, 2);
}
/**
@@ -153,6 +165,7 @@ public class Mp3ParserTest extends TestC
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
}
/**
@@ -188,6 +201,7 @@ public class Mp3ParserTest extends TestC
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
}
/**
@@ -222,6 +236,7 @@ public class Mp3ParserTest extends TestC
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
}
@@ -262,6 +277,7 @@ public class Mp3ParserTest extends TestC
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
+ checkDuration(metadata, 1);
}
public void testID3v2Frame() throws Exception {
Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java?rev=1442402&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java Mon Feb 4 22:24:01 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.junit.After;
+import org.junit.Test;
+
+/**
+ * Test class for {@code MpegStream}.
+ */
+public class MpegStreamTest
+{
+ /** The stream to be tested. */
+ private MpegStream stream;
+
+ @After
+ public void tearDown() throws Exception
+ {
+ if (stream != null)
+ {
+ stream.close();
+ }
+ }
+
+ /**
+ * Tests whether the default test header can be found in a stream.
+ *
+ * @param bos the stream
+ * @throws IOException if an error occurs
+ */
+ private void checkDefaultHeader(ByteArrayOutputStream bos)
+ throws IOException
+ {
+ ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+ stream = new MpegStream(in);
+ AudioFrame header = stream.nextFrame();
+ assertNotNull("No header found", header);
+ assertEquals("Wrong MPEG version", AudioFrame.MPEG_V2,
+ header.getVersionCode());
+ assertEquals("Wrong layer", AudioFrame.LAYER_3, header.getLayer());
+ assertEquals("Wrong bit rate", 80000, header.getBitRate());
+ assertEquals("Wrong sample rate", 24000, header.getSampleRate());
+ }
+
+ /**
+ * Writes the given byte the given number of times into an output stream.
+ *
+ * @param out the output stream
+ * @param value the value to write
+ * @param count the number of bytes to write
+ * @throws IOException if an error occurs
+ */
+ private static void writeBytes(OutputStream out, int value, int count)
+ throws IOException
+ {
+ for (int i = 0; i < count; i++)
+ {
+ out.write(value);
+ }
+ }
+
+ /**
+ * Writes a frame header in the given output stream.
+ *
+ * @param out the output stream
+ * @param b2 byte 2 of the header
+ * @param b3 byte 3 of the header
+ * @param b4 byte 4 of the header
+ * @throws IOException if an error occurs
+ */
+ private static void writeFrame(OutputStream out, int b2, int b3, int b4)
+ throws IOException
+ {
+ out.write(0xFF);
+ out.write(b2);
+ out.write(b3);
+ out.write(b4);
+ }
+
+ /**
+ * Tests whether an audio frame header can be found somewhere in a stream.
+ */
+ @Test
+ public void testSearchNextFrame() throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ writeBytes(bos, 0xFF, 32);
+ writeBytes(bos, 0, 16);
+ writeBytes(bos, 0xFF, 8);
+ bos.write(0xF3);
+ bos.write(0x96);
+ bos.write(0);
+ checkDefaultHeader(bos);
+ }
+
+ /**
+ * Tests whether invalid frame headers are detected and skipped.
+ */
+ @Test
+ public void testSearchNextFrameInvalid() throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ writeFrame(bos, 0xEB, 0x96, 0);
+ writeFrame(bos, 0xF9, 0x96, 0);
+ writeFrame(bos, 0xF3, 0, 0);
+ writeFrame(bos, 0xF3, 0xF0, 0);
+ writeFrame(bos, 0xF3, 0x7C, 0);
+ writeFrame(bos, 0xF3, 0x96, 0);
+ checkDefaultHeader(bos);
+ }
+
+ /**
+ * Tests a search for another frame which is interrupted because the stream
+ * ends.
+ */
+ @Test
+ public void testSeachNextFrameEOS() throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ bos.write(0xFF);
+ bos.write(0xFF);
+ bos.write(0xF3);
+ bos.write(0x96);
+ ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+ stream = new MpegStream(in);
+ assertNull("Got a frame", stream.nextFrame());
+ }
+
+ /**
+ * Tries to skip a frame if no current header is available.
+ */
+ @Test
+ public void testSkipNoCurrentHeader() throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ bos.write("This is a test".getBytes());
+ ByteArrayInputStream in = new ByteArrayInputStream(bos.toByteArray());
+ stream = new MpegStream(in);
+ assertFalse("Wrong result", stream.skipFrame());
+ }
+}