You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/09/19 16:51:38 UTC
[tika] branch master updated: TIKA-2730 -- allow last frame to be
truncated w/o throwing an EOF
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new e4d6d15 TIKA-2730 -- allow last frame to be truncated w/o throwing an EOF
e4d6d15 is described below
commit e4d6d15f705232b0a422c01e217919e53b118fdf
Author: TALLISON <ta...@apache.org>
AuthorDate: Wed Sep 19 12:51:23 2018 -0400
TIKA-2730 -- allow last frame to be truncated w/o throwing an EOF
---
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 9 ++++--
.../org/apache/tika/parser/mp3/MpegStream.java | 11 ++++---
.../org/apache/tika/parser/mp3/Mp3ParserTest.java | 33 ++++++++++++++++++++-
.../test-documents/testMP3i18n_truncated.mp3 | Bin 0 -> 40672 bytes
4 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
index 3b79f31..345f486 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
@@ -194,15 +194,18 @@ public class Mp3Parser extends AbstractParser {
// Now iterate over all audio frames in the file
AudioFrame frame = mpegStream.nextFrame();
float duration = 0;
- while (frame != null)
+ boolean skipped = true;
+ while (frame != null && skipped)
{
duration += frame.getDuration();
if (firstAudio == null)
{
firstAudio = frame;
}
- mpegStream.skipFrame();
- frame = mpegStream.nextFrame();
+ skipped = mpegStream.skipFrame();
+ if (skipped) {
+ frame = mpegStream.nextFrame();
+ }
}
// ID3v1 tags live at the end of the file
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
index 1814c12..4984fea 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
@@ -150,10 +150,10 @@ class MpegStream extends PushbackInputStream
* Skips the current MPEG frame. This method can be called after a valid
* MPEG header has been retrieved using {@code nextFrame()}. In this case
* the underlying stream is advanced to the end of the associated MPEG
- * frame. Otherwise, this method has no effect. The return value indicates
- * whether a frame could be skipped.
+ * frame or until the EOF is reached. The return value indicates
+ * whether the full frame could be skipped.
*
- * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise
+ * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise, perhaps EOF?
* @throws IOException if an IO error occurs
*/
public boolean skipFrame() throws IOException
@@ -162,11 +162,10 @@ class MpegStream extends PushbackInputStream
{
long toSkip = currentHeader.getLength() - HEADER_SIZE;
long skipped = IOUtils.skipFully(in, toSkip);
+ currentHeader = null;
if (skipped < toSkip) {
- throw new EOFException("EOF: tried to skip "+toSkip +
- " but could only skip "+skipped);
+ return false;
}
- currentHeader = null;
return true;
}
return false;
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
index ae9d06b..951e46d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
@@ -243,7 +243,38 @@ public class Mp3ParserTest {
assertEquals("1", metadata.get("channels"));
checkDuration(metadata, 2);
}
-
+ /**
+ * Tests that a file with the last frame slightly
+ * truncated does not cause an EOF and does
+ * not lead to an infinite loop.
+ */
+ @Test
+ public void testMp3ParsingID3i18nTruncated() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3i18n_truncated.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
+ assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
+
+ assertEquals(
+ "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment",
+ metadata.get(XMPDM.LOG_COMMENT)
+ );
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
+ }
/**
* Tests that a file with both lyrics and
diff --git a/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3 b/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3
new file mode 100644
index 0000000..c2cd30d
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3 differ