You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:35 UTC

[29/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
index 4105dfa..1c615f6 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
@@ -1,414 +1,414 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.XMPDM;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-/**
- * Test case for parsing mp3 files.
- */
-public class Mp3ParserTest {
-
-    /**
-     * Checks the duration of an MP3 file.
-     * @param metadata the metadata object
-     * @param expected the expected duration, rounded as seconds
-     */
-    private static void checkDuration(Metadata metadata, int expected) {
-        assertEquals("Wrong duration", expected,
-                Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
-    }
-
-    /**
-     * Test that with only ID3v1 tags, we get some information out   
-     */
-    @Test
-    public void testMp3ParsingID3v1() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3id3v1.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
-        String content = handler.toString();
-        assertContains("Test Title", content);
-        assertContains("Test Artist", content);
-        assertContains("Test Album", content);
-        assertContains("2008", content);
-        assertContains("Test Comment", content);
-        assertContains("Rock", content);
-        
-        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-        assertEquals("44100", metadata.get("samplerate"));
-        assertEquals("1", metadata.get("channels"));
-        checkDuration(metadata, 2);
-    }
-
-    /**
-     * Test that with only ID3v2 tags, we get the full
-     *  set of information out.
-     */
-    @Test
-    public void testMp3ParsingID3v2() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3id3v2.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        // Check core properties
-        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
-        // Check the textual contents
-        String content = handler.toString();
-        assertContains("Test Title", content);
-        assertContains("Test Artist", content);
-        assertContains("Test Album", content);
-        assertContains("2008", content);
-        assertContains("Test Comment", content);
-        assertContains("Rock", content);
-        assertContains(", track 1", content);
-        assertContains(", disc 1", content);
-        
-        // Check un-typed audio properties
-        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-        assertEquals("44100", metadata.get("samplerate"));
-        assertEquals("1", metadata.get("channels"));
-        
-        // Check XMPDM-typed audio properties
-        assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
-        assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
-        assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
-        assertEquals(null, metadata.get(XMPDM.COMPOSER));
-        assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
-        assertEquals("Rock", metadata.get(XMPDM.GENRE));
-        assertEquals("XXX - ID3v1 Comment\nTest Comment", metadata.get(XMPDM.LOG_COMMENT.getName()));
-        assertEquals("1", metadata.get(XMPDM.TRACK_NUMBER));
-        assertEquals("1/1", metadata.get(XMPDM.DISC_NUMBER));
-        assertEquals("1", metadata.get(XMPDM.COMPILATION));
-        
-        assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
-        assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
-        assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
-        checkDuration(metadata, 2);
-    }
-
-    /**
-     * Test that with both id3v2 and id3v1, we prefer the
-     *  details from id3v2
-     */
-    @Test
-    public void testMp3ParsingID3v1v2() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3id3v1_v2.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
-        String content = handler.toString();
-        assertContains("Test Title", content);
-        assertContains("Test Artist", content);
-        assertContains("Test Album", content);
-        assertContains("2008", content);
-        assertContains("Test Comment", content);
-        assertContains("Rock", content);
-        
-        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-        assertEquals("44100", metadata.get("samplerate"));
-        assertEquals("1", metadata.get("channels"));
-        checkDuration(metadata, 2);
-    }
-
-    /**
-     * Test that with only ID3v2 tags, of version 2.4, we get the full
-     *  set of information out.
-     */
-    @Test
-    public void testMp3ParsingID3v24() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3id3v24.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
-        String content = handler.toString();
-        assertContains("Test Title", content);
-        assertContains("Test Artist", content);
-        assertContains("Test Album", content);
-        assertContains("2008", content);
-        assertContains("Test Comment", content);
-        assertContains("Rock", content);
-        assertContains(", disc 1", content);
-        
-        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-        assertEquals("44100", metadata.get("samplerate"));
-        assertEquals("1", metadata.get("channels"));
-        checkDuration(metadata, 2);
-
-        // Check XMPDM-typed audio properties
-        assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
-        assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
-        assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
-        assertEquals(null, metadata.get(XMPDM.COMPOSER));
-        assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
-        assertEquals("Rock", metadata.get(XMPDM.GENRE));
-        assertEquals("1", metadata.get(XMPDM.COMPILATION));
-        
-        assertEquals(null, metadata.get(XMPDM.TRACK_NUMBER));
-        assertEquals("1", metadata.get(XMPDM.DISC_NUMBER));
-    }
-    
-    /**
-     * Tests that a file with characters not in the ISO 8859-1
-     *  range is correctly handled
-     */
-    @Test
-    public void testMp3ParsingID3i18n() throws Exception {
-       Parser parser = new AutoDetectParser(); // Should auto-detect!
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3i18n.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-       assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
-       assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
-       assertEquals("Test Artist \u2468\u2460", metadata.get(Metadata.AUTHOR));
-       assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
-       assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
-
-       assertEquals(
-             "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment", 
-             metadata.get(XMPDM.LOG_COMMENT)
-       );
-       
-       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-       assertEquals("44100", metadata.get("samplerate"));
-       assertEquals("1", metadata.get("channels"));
-       checkDuration(metadata, 2);
-   }
-    
-    
-    /**
-     * Tests that a file with both lyrics and
-     *  ID3v2 tags gets both extracted correctly
-     */
-    @Test
-    public void testMp3ParsingLyrics() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        // Note - our test file has a lyrics tag, but lacks any
-        //  lyrics in the tags, so we can't test that bit
-        // TODO Find a better sample file
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3lyrics.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
-        String content = handler.toString();
-        assertContains("Test Title", content);
-        assertContains("Test Artist", content);
-        assertContains("Test Album", content);
-        assertContains("2008", content);
-        assertContains("Test Comment", content);
-        assertContains("Rock", content);
-        
-        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-        assertEquals("44100", metadata.get("samplerate"));
-        assertEquals("2", metadata.get("channels"));
-        checkDuration(metadata, 1);
-    }
-    
-    @Test
-    public void testID3v2Frame() throws Exception {
-       byte[] empty = new byte[] {
-             0x49, 0x44, 0x33, 3, 1, 0,
-             0, 0, 0, 0
-       };
-       
-       assertEquals(11, ID3v2Frame.getInt(new byte[] {0,0,0,0x0b}));
-       assertEquals(257, ID3v2Frame.getInt(new byte[] {0,0,1,1}));
-       
-       ID3v2Frame f = (ID3v2Frame)
-            ID3v2Frame.createFrameIfPresent(new ByteArrayInputStream(empty));
-       assertEquals(3, f.getMajorVersion());
-       assertEquals(1, f.getMinorVersion());
-       assertEquals(0, f.getFlags());
-       assertEquals(0, f.getLength());
-       assertEquals(0, f.getData().length);
-       
-       assertEquals("", ID3v2Frame.getTagString(f.getData(), 0, 0));
-       assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
-       assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0}, 0, 3));
-    }
-
-    @Test
-    public void testTIKA1589_noId3ReturnsDurationCorrectly() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
-        Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3noid3.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-        assertEquals("2455.510986328125", metadata.get(XMPDM.DURATION));
-    }
-    
-    /**
-     * This test will do nothing, unless you've downloaded the
-     *  mp3 file from TIKA-424 - the file cannot be
-     *  distributed with Tika.
-     * This test will check for the complicated set of ID3v2.4
-     *  tags.
-     */
-    @Test
-    public void testTIKA424() throws Exception {
-       Parser parser = new AutoDetectParser(); // Should auto-detect!
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/test2.mp3")) {
-            if (stream == null) {
-                // You haven't downloaded the file
-                // Skip the test
-                return;
-            }
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-       assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
-       assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
-       assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
-
-       String content = handler.toString();
-       assertContains("Plus loin vers l'ouest", content);
-       
-       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
-       assertEquals("44100", metadata.get("samplerate"));
-       assertEquals("2", metadata.get("channels"));
-    }
-    
-    /**
-     * This tests that we can handle without errors (but perhaps not
-     *  all content) a file with a very very large ID3 frame that
-     *  has been truncated before the end of the ID3 tags.
-     * In this case, it is a file with JPEG data in the ID3, which
-     *  is trunacted before the end of the JPEG bit of the ID3 frame.
-     */
-    @Test
-    public void testTIKA474() throws Exception {
-       Parser parser = new AutoDetectParser(); // Should auto-detect!
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testMP3truncated.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-
-       // Check we could get the headers from the start
-       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-       assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
-       assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
-       assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
-
-       String content = handler.toString();
-       assertContains("Girl you have no faith in medicine", content);
-       assertContains("The White Stripes", content);
-       assertContains("Elephant", content);
-       assertContains("2003", content);
-       
-       // File lacks any audio frames, so we can't know these
-       assertEquals(null, metadata.get("version"));
-       assertEquals(null, metadata.get("samplerate"));
-       assertEquals(null, metadata.get("channels"));
-    }
-
-    // TIKA-1024
-    @Test
-    public void testNakedUTF16BOM() throws Exception {
-       Parser parser = new AutoDetectParser(); // Should auto-detect!
-       ContentHandler handler = new BodyContentHandler();
-       Metadata metadata = new Metadata();
-
-        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
-                "/test-documents/testNakedUTF16BOM.mp3")) {
-            parser.parse(stream, handler, metadata, new ParseContext());
-        }
-       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
-       assertEquals("", metadata.get(XMPDM.GENRE));
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Test case for parsing mp3 files.
+ */
+public class Mp3ParserTest {
+
+    /**
+     * Checks the duration of an MP3 file.
+     * @param metadata the metadata object
+     * @param expected the expected duration, rounded as seconds
+     */
+    private static void checkDuration(Metadata metadata, int expected) {
+        assertEquals("Wrong duration", expected,
+                Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
+    }
+
+    /**
+     * Test that with only ID3v1 tags, we get some information out   
+     */
+    @Test
+    public void testMp3ParsingID3v1() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3id3v1.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+        String content = handler.toString();
+        assertContains("Test Title", content);
+        assertContains("Test Artist", content);
+        assertContains("Test Album", content);
+        assertContains("2008", content);
+        assertContains("Test Comment", content);
+        assertContains("Rock", content);
+        
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
+    }
+
+    /**
+     * Test that with only ID3v2 tags, we get the full
+     *  set of information out.
+     */
+    @Test
+    public void testMp3ParsingID3v2() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3id3v2.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        // Check core properties
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+        // Check the textual contents
+        String content = handler.toString();
+        assertContains("Test Title", content);
+        assertContains("Test Artist", content);
+        assertContains("Test Album", content);
+        assertContains("2008", content);
+        assertContains("Test Comment", content);
+        assertContains("Rock", content);
+        assertContains(", track 1", content);
+        assertContains(", disc 1", content);
+        
+        // Check un-typed audio properties
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("1", metadata.get("channels"));
+        
+        // Check XMPDM-typed audio properties
+        assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
+        assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
+        assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
+        assertEquals(null, metadata.get(XMPDM.COMPOSER));
+        assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
+        assertEquals("Rock", metadata.get(XMPDM.GENRE));
+        assertEquals("XXX - ID3v1 Comment\nTest Comment", metadata.get(XMPDM.LOG_COMMENT.getName()));
+        assertEquals("1", metadata.get(XMPDM.TRACK_NUMBER));
+        assertEquals("1/1", metadata.get(XMPDM.DISC_NUMBER));
+        assertEquals("1", metadata.get(XMPDM.COMPILATION));
+        
+        assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
+        assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
+        assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
+        checkDuration(metadata, 2);
+    }
+
+    /**
+     * Test that with both id3v2 and id3v1, we prefer the
+     *  details from id3v2
+     */
+    @Test
+    public void testMp3ParsingID3v1v2() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3id3v1_v2.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+        String content = handler.toString();
+        assertContains("Test Title", content);
+        assertContains("Test Artist", content);
+        assertContains("Test Album", content);
+        assertContains("2008", content);
+        assertContains("Test Comment", content);
+        assertContains("Rock", content);
+        
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
+    }
+
+    /**
+     * Test that with only ID3v2 tags, of version 2.4, we get the full
+     *  set of information out.
+     */
+    @Test
+    public void testMp3ParsingID3v24() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3id3v24.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+        String content = handler.toString();
+        assertContains("Test Title", content);
+        assertContains("Test Artist", content);
+        assertContains("Test Album", content);
+        assertContains("2008", content);
+        assertContains("Test Comment", content);
+        assertContains("Rock", content);
+        assertContains(", disc 1", content);
+        
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
+
+        // Check XMPDM-typed audio properties
+        assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
+        assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
+        assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
+        assertEquals(null, metadata.get(XMPDM.COMPOSER));
+        assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
+        assertEquals("Rock", metadata.get(XMPDM.GENRE));
+        assertEquals("1", metadata.get(XMPDM.COMPILATION));
+        
+        assertEquals(null, metadata.get(XMPDM.TRACK_NUMBER));
+        assertEquals("1", metadata.get(XMPDM.DISC_NUMBER));
+    }
+    
+    /**
+     * Tests that a file with characters not in the ISO 8859-1
+     *  range is correctly handled
+     */
+    @Test
+    public void testMp3ParsingID3i18n() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3i18n.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
+       assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
+       assertEquals("Test Artist \u2468\u2460", metadata.get(Metadata.AUTHOR));
+       assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
+       assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
+
+       assertEquals(
+             "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment", 
+             metadata.get(XMPDM.LOG_COMMENT)
+       );
+       
+       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+       assertEquals("44100", metadata.get("samplerate"));
+       assertEquals("1", metadata.get("channels"));
+       checkDuration(metadata, 2);
+   }
+    
+    
+    /**
+     * Tests that a file with both lyrics and
+     *  ID3v2 tags gets both extracted correctly
+     */
+    @Test
+    public void testMp3ParsingLyrics() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        // Note - our test file has a lyrics tag, but lacks any
+        //  lyrics in the tags, so we can't test that bit
+        // TODO Find a better sample file
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3lyrics.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+        String content = handler.toString();
+        assertContains("Test Title", content);
+        assertContains("Test Artist", content);
+        assertContains("Test Album", content);
+        assertContains("2008", content);
+        assertContains("Test Comment", content);
+        assertContains("Rock", content);
+        
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("2", metadata.get("channels"));
+        checkDuration(metadata, 1);
+    }
+    
+    @Test
+    public void testID3v2Frame() throws Exception {
+       byte[] empty = new byte[] {
+             0x49, 0x44, 0x33, 3, 1, 0,
+             0, 0, 0, 0
+       };
+       
+       assertEquals(11, ID3v2Frame.getInt(new byte[] {0,0,0,0x0b}));
+       assertEquals(257, ID3v2Frame.getInt(new byte[] {0,0,1,1}));
+       
+       ID3v2Frame f = (ID3v2Frame)
+            ID3v2Frame.createFrameIfPresent(new ByteArrayInputStream(empty));
+       assertEquals(3, f.getMajorVersion());
+       assertEquals(1, f.getMinorVersion());
+       assertEquals(0, f.getFlags());
+       assertEquals(0, f.getLength());
+       assertEquals(0, f.getData().length);
+       
+       assertEquals("", ID3v2Frame.getTagString(f.getData(), 0, 0));
+       assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
+       assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0}, 0, 3));
+    }
+
+    @Test
+    public void testTIKA1589_noId3ReturnsDurationCorrectly() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3noid3.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("2455.510986328125", metadata.get(XMPDM.DURATION));
+    }
+    
+    /**
+     * This test will do nothing, unless you've downloaded the
+     *  mp3 file from TIKA-424 - the file cannot be
+     *  distributed with Tika.
+     * This test will check for the complicated set of ID3v2.4
+     *  tags.
+     */
+    @Test
+    public void testTIKA424() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/test2.mp3")) {
+            if (stream == null) {
+                // You haven't downloaded the file
+                // Skip the test
+                return;
+            }
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
+       assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
+       assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
+
+       String content = handler.toString();
+       assertContains("Plus loin vers l'ouest", content);
+       
+       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+       assertEquals("44100", metadata.get("samplerate"));
+       assertEquals("2", metadata.get("channels"));
+    }
+    
+    /**
+     * This tests that we can handle without errors (but perhaps not
+     *  all content) a file with a very very large ID3 frame that
+     *  has been truncated before the end of the ID3 tags.
+     * In this case, it is a file with JPEG data in the ID3, which
+     *  is trunacted before the end of the JPEG bit of the ID3 frame.
+     */
+    @Test
+    public void testTIKA474() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3truncated.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+       // Check we could get the headers from the start
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
+       assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
+       assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
+
+       String content = handler.toString();
+       assertContains("Girl you have no faith in medicine", content);
+       assertContains("The White Stripes", content);
+       assertContains("Elephant", content);
+       assertContains("2003", content);
+       
+       // File lacks any audio frames, so we can't know these
+       assertEquals(null, metadata.get("version"));
+       assertEquals(null, metadata.get("samplerate"));
+       assertEquals(null, metadata.get("channels"));
+    }
+
+    // TIKA-1024
+    @Test
+    public void testNakedUTF16BOM() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testNakedUTF16BOM.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("", metadata.get(XMPDM.GENRE));
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
index 36c0efe..aeaf71e 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
@@ -1,92 +1,92 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.ocr;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.InputStream;
-
-import org.apache.tika.TikaTest;
-import org.junit.Test;
-
-public class TesseractOCRConfigTest extends TikaTest {
-
-    @Test
-    public void testNoConfig() throws Exception {
-        TesseractOCRConfig config = new TesseractOCRConfig();
-        assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
-        assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
-        assertEquals("Invalid default language value", "eng", config.getLanguage());
-        assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
-        assertEquals("Invalid default minFileSizeToOcr value", 0, config.getMinFileSizeToOcr());
-        assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
-        assertEquals("Invalid default timeout value", 120, config.getTimeout());
-    }
-
-    @Test
-    public void testPartialConfig() throws Exception {
-
-        InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
-                "/test-properties/TesseractOCRConfig-partial.properties");
-
-        TesseractOCRConfig config = new TesseractOCRConfig(stream);
-        assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
-        assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
-        assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
-        assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
-        assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
-        assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
-        assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
-    }
-
-    @Test
-    public void testFullConfig() throws Exception {
-
-        InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
-                "/test-properties/TesseractOCRConfig-full.properties");
-
-        TesseractOCRConfig config = new TesseractOCRConfig(stream);
-        assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath());
-        assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath());
-        assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
-        assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode());
-        assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
-        assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr());
-        assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
-    }
-
-    @Test(expected=IllegalArgumentException.class)
-    public void testValidateLanguage() {
-        TesseractOCRConfig config = new TesseractOCRConfig();
-        config.setLanguage("eng");
-        config.setLanguage("eng+fra");
-        assertTrue("Couldn't set valid values", true);
-        config.setLanguage("rm -Rf *");
-    }
-
-    @Test(expected=IllegalArgumentException.class)
-    public void testValidatePageSegMode() {
-        TesseractOCRConfig config = new TesseractOCRConfig();
-        config.setPageSegMode("0");
-        config.setPageSegMode("10");
-        assertTrue("Couldn't set valid values", true);
-        config.setPageSegMode("11");
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ocr;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.junit.Test;
+
+public class TesseractOCRConfigTest extends TikaTest {
+
+    @Test
+    public void testNoConfig() throws Exception {
+        TesseractOCRConfig config = new TesseractOCRConfig();
+        assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
+        assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
+        assertEquals("Invalid default language value", "eng", config.getLanguage());
+        assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
+        assertEquals("Invalid default minFileSizeToOcr value", 0, config.getMinFileSizeToOcr());
+        assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
+        assertEquals("Invalid default timeout value", 120, config.getTimeout());
+    }
+
+    @Test
+    public void testPartialConfig() throws Exception {
+
+        InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
+                "/test-properties/TesseractOCRConfig-partial.properties");
+
+        TesseractOCRConfig config = new TesseractOCRConfig(stream);
+        assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
+        assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
+        assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
+        assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
+        assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
+        assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
+        assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
+    }
+
+    @Test
+    public void testFullConfig() throws Exception {
+
+        InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
+                "/test-properties/TesseractOCRConfig-full.properties");
+
+        TesseractOCRConfig config = new TesseractOCRConfig(stream);
+        assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath());
+        assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath());
+        assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
+        assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode());
+        assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
+        assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr());
+        assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testValidateLanguage() {
+        TesseractOCRConfig config = new TesseractOCRConfig();
+        config.setLanguage("eng");
+        config.setLanguage("eng+fra");
+        assertTrue("Couldn't set valid values", true);
+        config.setLanguage("rm -Rf *");
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testValidatePageSegMode() {
+        TesseractOCRConfig config = new TesseractOCRConfig();
+        config.setPageSegMode("0");
+        config.setPageSegMode("10");
+        assertTrue("Couldn't set valid values", true);
+        config.setPageSegMode("11");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
index 147113e..d3a876e 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
@@ -1,44 +1,44 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.video;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.tika.Tika;
-import org.apache.tika.metadata.Metadata;
-import org.junit.Test;
-
-public class FLVParserTest {
-
-    @Test
-    public void testFLV() throws Exception {
-        String path = "/test-documents/testFLV.flv";
-        Metadata metadata = new Metadata();
-
-        String content = new Tika().parseToString(
-                FLVParserTest.class.getResourceAsStream(path), metadata);
-
-        assertEquals("", content);
-        assertEquals("video/x-flv", metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("true", metadata.get("hasVideo"));
-        assertEquals("false", metadata.get("stereo"));
-        assertEquals("true", metadata.get("hasAudio"));
-        assertEquals("120.0", metadata.get("height"));
-        assertEquals("16.0", metadata.get("audiosamplesize"));
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.video;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class FLVParserTest {
+
+    @Test
+    public void testFLV() throws Exception {
+        String path = "/test-documents/testFLV.flv";
+        Metadata metadata = new Metadata();
+
+        String content = new Tika().parseToString(
+                FLVParserTest.class.getResourceAsStream(path), metadata);
+
+        assertEquals("", content);
+        assertEquals("video/x-flv", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("true", metadata.get("hasVideo"));
+        assertEquals("false", metadata.get("stereo"));
+        assertEquals("true", metadata.get("hasAudio"));
+        assertEquals("120.0", metadata.get("height"));
+        assertEquals("16.0", metadata.get("audiosamplesize"));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/pom.xml b/tika-parser-modules/tika-parser-office-module/pom.xml
index 4756328..4825076 100644
--- a/tika-parser-modules/tika-parser-office-module/pom.xml
+++ b/tika-parser-modules/tika-parser-office-module/pom.xml
@@ -1,126 +1,126 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
-  license agreements. See the NOTICE file distributed with this work for additional 
-  information regarding copyright ownership. The ASF licenses this file to 
-  you under the Apache License, Version 2.0 (the "License"); you may not use 
-  this file except in compliance with the License. You may obtain a copy of 
-  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
-  by applicable law or agreed to in writing, software distributed under the 
-  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
-  OF ANY KIND, either express or implied. See the License for the specific 
-  language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parser-modules</artifactId>
-    <version>2.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>tika-parser-office-module</artifactId>
-  <name>Apache Tika parser office module</name>
-  <url>http://tika.apache.org/</url>
-  
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>${commons.io.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi</artifactId>
-      <version>${poi.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi-scratchpad</artifactId>
-      <version>${poi.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi-ooxml</artifactId>
-      <version>${poi.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>stax</groupId>
-          <artifactId>stax-api</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>xml-apis</groupId>
-          <artifactId>xml-apis</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>com.healthmarketscience.jackcess</groupId>
-      <artifactId>jackcess</artifactId>
-      <version>2.1.3</version>
-    </dependency>
-    <dependency>
-      <groupId>com.healthmarketscience.jackcess</groupId>
-      <artifactId>jackcess-encrypt</artifactId>
-      <version>2.1.1</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.bouncycastle</groupId>
-          <artifactId>bcprov-jdk15on</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <!-- PDFBox (in the tika-parser-pdf-module) and poi's ooxml
-         code relies on bouncy castle, as does jackcess-encrypt
-         Need to exclude the older library and include the newer one
-         if there is a conflict.
-         -->
-    <dependency>
-      <groupId>org.bouncycastle</groupId>
-      <artifactId>bcprov-jdk15on</artifactId>
-      <version>${bouncycastle.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.pff</groupId>
-      <artifactId>java-libpst</artifactId>
-      <version>0.8.1</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-package-module</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-web-module</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-text-module</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  you under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parser-modules</artifactId>
+    <version>2.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>tika-parser-office-module</artifactId>
+  <name>Apache Tika parser office module</name>
+  <url>http://tika.apache.org/</url>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons.io.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi</artifactId>
+      <version>${poi.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-scratchpad</artifactId>
+      <version>${poi.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-ooxml</artifactId>
+      <version>${poi.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>stax</groupId>
+          <artifactId>stax-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xml-apis</groupId>
+          <artifactId>xml-apis</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.healthmarketscience.jackcess</groupId>
+      <artifactId>jackcess</artifactId>
+      <version>2.1.3</version>
+    </dependency>
+    <dependency>
+      <groupId>com.healthmarketscience.jackcess</groupId>
+      <artifactId>jackcess-encrypt</artifactId>
+      <version>2.1.1</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.bouncycastle</groupId>
+          <artifactId>bcprov-jdk15on</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- PDFBox (in the tika-parser-pdf-module) and poi's ooxml
+         code relies on bouncy castle, as does jackcess-encrypt
+         Need to exclude the older library and include the newer one
+         if there is a conflict.
+         -->
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcprov-jdk15on</artifactId>
+      <version>${bouncycastle.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.pff</groupId>
+      <artifactId>java-libpst</artifactId>
+      <version>0.8.1</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-package-module</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-web-module</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-text-module</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
index 32a41ab..8f34381 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.module.office.internal;
-
-import org.apache.tika.osgi.TikaAbstractBundleActivator;
-import org.osgi.framework.BundleContext;
-
-public class Activator extends TikaAbstractBundleActivator {
-
-    @Override
-    public void start(BundleContext context) throws Exception {
-
-        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
-
-    }
-
-    @Override
-    public void stop(BundleContext context) throws Exception {
-
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.office.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+    @Override
+    public void start(BundleContext context) throws Exception {
+
+        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
index c3e85c1..94c5aa5 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
@@ -1,112 +1,112 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserProxy;
-import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
-import org.apache.tika.parser.chm.core.ChmExtractor;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.EmbeddedContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class ChmParser extends AbstractParser {
-
-    /** Serial version UID */
-    private static final long serialVersionUID = 5938777307516469802L;
-
-    private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
-                    MediaType.application("vnd.ms-htmlhelp"),
-                    MediaType.application("chm"),
-                    MediaType.application("x-chm"))));
-
-    private final Parser htmlProxy;
-    
-    public ChmParser() {
-        this.htmlProxy = createParserProxy("org.apache.tika.parser.html.HtmlParser");
-    }
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    @Override
-    public void parse(InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context) throws IOException,
-            SAXException, TikaException {
-        ChmExtractor chmExtractor = new ChmExtractor(stream);
-
-        // metadata
-        metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
-
-        // content
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
-        for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) {
-            final String entryName = entry.getName();
-            if (entryName.endsWith(".html") 
-                    || entryName.endsWith(".htm")
-            ) {
-//                AttributesImpl attrs = new AttributesImpl();
-//                attrs.addAttribute("", "name", "name", "String", entryName);
-//                xhtml.startElement("", "document", "document", attrs);
-                
-                byte[] data = chmExtractor.extractChmEntry(entry);
-
-                parsePage(data, xhtml);
-                
-//                xhtml.endElement("", "", "document");
-            }
-        }
-
-        xhtml.endDocument();
-    }
-
-
-    private void parsePage(byte[] byteObject, ContentHandler xhtml) throws TikaException {// throws IOException
-        InputStream stream = null;
-        Metadata metadata = new Metadata();
-        ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));// -1
-        ParseContext parser = new ParseContext();
-        try {
-            stream = new ByteArrayInputStream(byteObject);
-            htmlProxy.parse(stream, handler, metadata, parser);
-        } catch (SAXException e) {
-            throw new RuntimeException(e);
-        } catch (IOException e) {
-            // Pushback overflow from tagsoup
-        }
-    }
-    
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserProxy;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.core.ChmExtractor;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.EmbeddedContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ChmParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = 5938777307516469802L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    MediaType.application("vnd.ms-htmlhelp"),
+                    MediaType.application("chm"),
+                    MediaType.application("x-chm"))));
+
+    private final Parser htmlProxy;
+    
+    public ChmParser() {
+        this.htmlProxy = createParserProxy("org.apache.tika.parser.html.HtmlParser");
+    }
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        ChmExtractor chmExtractor = new ChmExtractor(stream);
+
+        // metadata
+        metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
+
+        // content
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) {
+            final String entryName = entry.getName();
+            if (entryName.endsWith(".html") 
+                    || entryName.endsWith(".htm")
+            ) {
+//                AttributesImpl attrs = new AttributesImpl();
+//                attrs.addAttribute("", "name", "name", "String", entryName);
+//                xhtml.startElement("", "document", "document", attrs);
+                
+                byte[] data = chmExtractor.extractChmEntry(entry);
+
+                parsePage(data, xhtml);
+                
+//                xhtml.endElement("", "", "document");
+            }
+        }
+
+        xhtml.endDocument();
+    }
+
+
+    private void parsePage(byte[] byteObject, ContentHandler xhtml) throws TikaException {// throws IOException
+        InputStream stream = null;
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));// -1
+        ParseContext parser = new ParseContext();
+        try {
+            stream = new ByteArrayInputStream(byteObject);
+            htmlProxy.parse(stream, handler, metadata, parser);
+        } catch (SAXException e) {
+            throw new RuntimeException(e);
+        } catch (IOException e) {
+            // Pushback overflow from tagsoup
+        }
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
index 42b0830..e8bf1cc 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
@@ -1,39 +1,39 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.io.Serializable;
-
-import org.apache.tika.exception.TikaException;
-
-/**
- * 
- * Defines an accessor interface
- * 
- * @param <T>
- */
-public interface ChmAccessor<T> extends Serializable {
-    /**
-     * Parses chm accessor
-     * 
-     * @param data
-     *            chm file
-     * @param chmAccessor
-     * @throws TikaException 
-     */
-    void parse(byte[] data, T chmAccessor) throws TikaException;
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.io.Serializable;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * 
+ * Defines an accessor interface
+ * 
+ * @param <T>
+ */
+public interface ChmAccessor<T> extends Serializable {
+    /**
+     * Parses chm accessor
+     * 
+     * @param data
+     *            chm file
+     * @param chmAccessor
+     * @throws TikaException 
+     */
+    void parse(byte[] data, T chmAccessor) throws TikaException;
+}