You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:35 UTC
[29/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
index 4105dfa..1c615f6 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
@@ -1,414 +1,414 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import static org.apache.tika.TikaTest.assertContains;
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.XMPDM;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-/**
- * Test case for parsing mp3 files.
- */
-public class Mp3ParserTest {
-
- /**
- * Checks the duration of an MP3 file.
- * @param metadata the metadata object
- * @param expected the expected duration, rounded as seconds
- */
- private static void checkDuration(Metadata metadata, int expected) {
- assertEquals("Wrong duration", expected,
- Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
- }
-
- /**
- * Test that with only ID3v1 tags, we get some information out
- */
- @Test
- public void testMp3ParsingID3v1() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3id3v1.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Test Title", content);
- assertContains("Test Artist", content);
- assertContains("Test Album", content);
- assertContains("2008", content);
- assertContains("Test Comment", content);
- assertContains("Rock", content);
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("1", metadata.get("channels"));
- checkDuration(metadata, 2);
- }
-
- /**
- * Test that with only ID3v2 tags, we get the full
- * set of information out.
- */
- @Test
- public void testMp3ParsingID3v2() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3id3v2.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- // Check core properties
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
- // Check the textual contents
- String content = handler.toString();
- assertContains("Test Title", content);
- assertContains("Test Artist", content);
- assertContains("Test Album", content);
- assertContains("2008", content);
- assertContains("Test Comment", content);
- assertContains("Rock", content);
- assertContains(", track 1", content);
- assertContains(", disc 1", content);
-
- // Check un-typed audio properties
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("1", metadata.get("channels"));
-
- // Check XMPDM-typed audio properties
- assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
- assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
- assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
- assertEquals(null, metadata.get(XMPDM.COMPOSER));
- assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
- assertEquals("Rock", metadata.get(XMPDM.GENRE));
- assertEquals("XXX - ID3v1 Comment\nTest Comment", metadata.get(XMPDM.LOG_COMMENT.getName()));
- assertEquals("1", metadata.get(XMPDM.TRACK_NUMBER));
- assertEquals("1/1", metadata.get(XMPDM.DISC_NUMBER));
- assertEquals("1", metadata.get(XMPDM.COMPILATION));
-
- assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
- assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
- assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
- checkDuration(metadata, 2);
- }
-
- /**
- * Test that with both id3v2 and id3v1, we prefer the
- * details from id3v2
- */
- @Test
- public void testMp3ParsingID3v1v2() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3id3v1_v2.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Test Title", content);
- assertContains("Test Artist", content);
- assertContains("Test Album", content);
- assertContains("2008", content);
- assertContains("Test Comment", content);
- assertContains("Rock", content);
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("1", metadata.get("channels"));
- checkDuration(metadata, 2);
- }
-
- /**
- * Test that with only ID3v2 tags, of version 2.4, we get the full
- * set of information out.
- */
- @Test
- public void testMp3ParsingID3v24() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3id3v24.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Test Title", content);
- assertContains("Test Artist", content);
- assertContains("Test Album", content);
- assertContains("2008", content);
- assertContains("Test Comment", content);
- assertContains("Rock", content);
- assertContains(", disc 1", content);
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("1", metadata.get("channels"));
- checkDuration(metadata, 2);
-
- // Check XMPDM-typed audio properties
- assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
- assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
- assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
- assertEquals(null, metadata.get(XMPDM.COMPOSER));
- assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
- assertEquals("Rock", metadata.get(XMPDM.GENRE));
- assertEquals("1", metadata.get(XMPDM.COMPILATION));
-
- assertEquals(null, metadata.get(XMPDM.TRACK_NUMBER));
- assertEquals("1", metadata.get(XMPDM.DISC_NUMBER));
- }
-
- /**
- * Tests that a file with characters not in the ISO 8859-1
- * range is correctly handled
- */
- @Test
- public void testMp3ParsingID3i18n() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3i18n.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist \u2468\u2460", metadata.get(Metadata.AUTHOR));
- assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
- assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
-
- assertEquals(
- "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment",
- metadata.get(XMPDM.LOG_COMMENT)
- );
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("1", metadata.get("channels"));
- checkDuration(metadata, 2);
- }
-
-
- /**
- * Tests that a file with both lyrics and
- * ID3v2 tags gets both extracted correctly
- */
- @Test
- public void testMp3ParsingLyrics() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- // Note - our test file has a lyrics tag, but lacks any
- // lyrics in the tags, so we can't test that bit
- // TODO Find a better sample file
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3lyrics.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Test Title", content);
- assertContains("Test Artist", content);
- assertContains("Test Album", content);
- assertContains("2008", content);
- assertContains("Test Comment", content);
- assertContains("Rock", content);
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("2", metadata.get("channels"));
- checkDuration(metadata, 1);
- }
-
- @Test
- public void testID3v2Frame() throws Exception {
- byte[] empty = new byte[] {
- 0x49, 0x44, 0x33, 3, 1, 0,
- 0, 0, 0, 0
- };
-
- assertEquals(11, ID3v2Frame.getInt(new byte[] {0,0,0,0x0b}));
- assertEquals(257, ID3v2Frame.getInt(new byte[] {0,0,1,1}));
-
- ID3v2Frame f = (ID3v2Frame)
- ID3v2Frame.createFrameIfPresent(new ByteArrayInputStream(empty));
- assertEquals(3, f.getMajorVersion());
- assertEquals(1, f.getMinorVersion());
- assertEquals(0, f.getFlags());
- assertEquals(0, f.getLength());
- assertEquals(0, f.getData().length);
-
- assertEquals("", ID3v2Frame.getTagString(f.getData(), 0, 0));
- assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
- assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0}, 0, 3));
- }
-
- @Test
- public void testTIKA1589_noId3ReturnsDurationCorrectly() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3noid3.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("2455.510986328125", metadata.get(XMPDM.DURATION));
- }
-
- /**
- * This test will do nothing, unless you've downloaded the
- * mp3 file from TIKA-424 - the file cannot be
- * distributed with Tika.
- * This test will check for the complicated set of ID3v2.4
- * tags.
- */
- @Test
- public void testTIKA424() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/test2.mp3")) {
- if (stream == null) {
- // You haven't downloaded the file
- // Skip the test
- return;
- }
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Plus loin vers l'ouest", content);
-
- assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
- assertEquals("44100", metadata.get("samplerate"));
- assertEquals("2", metadata.get("channels"));
- }
-
- /**
- * This tests that we can handle without errors (but perhaps not
- * all content) a file with a very very large ID3 frame that
- * has been truncated before the end of the ID3 tags.
- * In this case, it is a file with JPEG data in the ID3, which
- * is trunacted before the end of the JPEG bit of the ID3 frame.
- */
- @Test
- public void testTIKA474() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testMP3truncated.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
-
- // Check we could get the headers from the start
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
-
- String content = handler.toString();
- assertContains("Girl you have no faith in medicine", content);
- assertContains("The White Stripes", content);
- assertContains("Elephant", content);
- assertContains("2003", content);
-
- // File lacks any audio frames, so we can't know these
- assertEquals(null, metadata.get("version"));
- assertEquals(null, metadata.get("samplerate"));
- assertEquals(null, metadata.get("channels"));
- }
-
- // TIKA-1024
- @Test
- public void testNakedUTF16BOM() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
-
- try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
- "/test-documents/testNakedUTF16BOM.mp3")) {
- parser.parse(stream, handler, metadata, new ParseContext());
- }
- assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("", metadata.get(XMPDM.GENRE));
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Test case for parsing mp3 files.
+ */
+public class Mp3ParserTest {
+
+ /**
+ * Checks the duration of an MP3 file.
+ * @param metadata the metadata object
+ * @param expected the expected duration, rounded as seconds
+ */
+ private static void checkDuration(Metadata metadata, int expected) {
+ assertEquals("Wrong duration", expected,
+ Math.round(Float.valueOf(metadata.get(XMPDM.DURATION)) / 1000));
+ }
+
+ /**
+ * Test that with only ID3v1 tags, we get some information out
+ */
+ @Test
+ public void testMp3ParsingID3v1() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3id3v1.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Test Title", content);
+ assertContains("Test Artist", content);
+ assertContains("Test Album", content);
+ assertContains("2008", content);
+ assertContains("Test Comment", content);
+ assertContains("Rock", content);
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
+ }
+
+ /**
+ * Test that with only ID3v2 tags, we get the full
+ * set of information out.
+ */
+ @Test
+ public void testMp3ParsingID3v2() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3id3v2.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ // Check core properties
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+ // Check the textual contents
+ String content = handler.toString();
+ assertContains("Test Title", content);
+ assertContains("Test Artist", content);
+ assertContains("Test Album", content);
+ assertContains("2008", content);
+ assertContains("Test Comment", content);
+ assertContains("Rock", content);
+ assertContains(", track 1", content);
+ assertContains(", disc 1", content);
+
+ // Check un-typed audio properties
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+
+ // Check XMPDM-typed audio properties
+ assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
+ assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
+ assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
+ assertEquals(null, metadata.get(XMPDM.COMPOSER));
+ assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
+ assertEquals("Rock", metadata.get(XMPDM.GENRE));
+ assertEquals("XXX - ID3v1 Comment\nTest Comment", metadata.get(XMPDM.LOG_COMMENT.getName()));
+ assertEquals("1", metadata.get(XMPDM.TRACK_NUMBER));
+ assertEquals("1/1", metadata.get(XMPDM.DISC_NUMBER));
+ assertEquals("1", metadata.get(XMPDM.COMPILATION));
+
+ assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
+ assertEquals("Mono", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
+ assertEquals("MP3", metadata.get(XMPDM.AUDIO_COMPRESSOR));
+ checkDuration(metadata, 2);
+ }
+
+ /**
+ * Test that with both id3v2 and id3v1, we prefer the
+ * details from id3v2
+ */
+ @Test
+ public void testMp3ParsingID3v1v2() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3id3v1_v2.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Test Title", content);
+ assertContains("Test Artist", content);
+ assertContains("Test Album", content);
+ assertContains("2008", content);
+ assertContains("Test Comment", content);
+ assertContains("Rock", content);
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
+ }
+
+ /**
+ * Test that with only ID3v2 tags, of version 2.4, we get the full
+ * set of information out.
+ */
+ @Test
+ public void testMp3ParsingID3v24() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3id3v24.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Test Title", content);
+ assertContains("Test Artist", content);
+ assertContains("Test Album", content);
+ assertContains("2008", content);
+ assertContains("Test Comment", content);
+ assertContains("Rock", content);
+ assertContains(", disc 1", content);
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
+
+ // Check XMPDM-typed audio properties
+ assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
+ assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
+ assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
+ assertEquals(null, metadata.get(XMPDM.COMPOSER));
+ assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
+ assertEquals("Rock", metadata.get(XMPDM.GENRE));
+ assertEquals("1", metadata.get(XMPDM.COMPILATION));
+
+ assertEquals(null, metadata.get(XMPDM.TRACK_NUMBER));
+ assertEquals("1", metadata.get(XMPDM.DISC_NUMBER));
+ }
+
+ /**
+ * Tests that a file with characters not in the ISO 8859-1
+ * range is correctly handled
+ */
+ @Test
+ public void testMp3ParsingID3i18n() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3i18n.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist \u2468\u2460", metadata.get(Metadata.AUTHOR));
+ assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
+ assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
+
+ assertEquals(
+ "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment",
+ metadata.get(XMPDM.LOG_COMMENT)
+ );
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("1", metadata.get("channels"));
+ checkDuration(metadata, 2);
+ }
+
+
+ /**
+ * Tests that a file with both lyrics and
+ * ID3v2 tags gets both extracted correctly
+ */
+ @Test
+ public void testMp3ParsingLyrics() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ // Note - our test file has a lyrics tag, but lacks any
+ // lyrics in the tags, so we can't test that bit
+ // TODO Find a better sample file
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3lyrics.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Test Title", content);
+ assertContains("Test Artist", content);
+ assertContains("Test Album", content);
+ assertContains("2008", content);
+ assertContains("Test Comment", content);
+ assertContains("Rock", content);
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("2", metadata.get("channels"));
+ checkDuration(metadata, 1);
+ }
+
+ @Test
+ public void testID3v2Frame() throws Exception {
+ byte[] empty = new byte[] {
+ 0x49, 0x44, 0x33, 3, 1, 0,
+ 0, 0, 0, 0
+ };
+
+ assertEquals(11, ID3v2Frame.getInt(new byte[] {0,0,0,0x0b}));
+ assertEquals(257, ID3v2Frame.getInt(new byte[] {0,0,1,1}));
+
+ ID3v2Frame f = (ID3v2Frame)
+ ID3v2Frame.createFrameIfPresent(new ByteArrayInputStream(empty));
+ assertEquals(3, f.getMajorVersion());
+ assertEquals(1, f.getMinorVersion());
+ assertEquals(0, f.getFlags());
+ assertEquals(0, f.getLength());
+ assertEquals(0, f.getData().length);
+
+ assertEquals("", ID3v2Frame.getTagString(f.getData(), 0, 0));
+ assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
+ assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0}, 0, 3));
+ }
+
+ @Test
+ public void testTIKA1589_noId3ReturnsDurationCorrectly() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3noid3.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("2455.510986328125", metadata.get(XMPDM.DURATION));
+ }
+
+ /**
+ * This test will do nothing, unless you've downloaded the
+ * mp3 file from TIKA-424 - the file cannot be
+ * distributed with Tika.
+ * This test will check for the complicated set of ID3v2.4
+ * tags.
+ */
+ @Test
+ public void testTIKA424() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/test2.mp3")) {
+ if (stream == null) {
+ // You haven't downloaded the file
+ // Skip the test
+ return;
+ }
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Plus loin vers l'ouest", content);
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("2", metadata.get("channels"));
+ }
+
+ /**
+ * This tests that we can handle without errors (but perhaps not
+ * all content) a file with a very very large ID3 frame that
+ * has been truncated before the end of the ID3 tags.
+ * In this case, it is a file with JPEG data in the ID3, which
+ * is trunacted before the end of the JPEG bit of the ID3 frame.
+ */
+ @Test
+ public void testTIKA474() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3truncated.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+
+ // Check we could get the headers from the start
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertContains("Girl you have no faith in medicine", content);
+ assertContains("The White Stripes", content);
+ assertContains("Elephant", content);
+ assertContains("2003", content);
+
+ // File lacks any audio frames, so we can't know these
+ assertEquals(null, metadata.get("version"));
+ assertEquals(null, metadata.get("samplerate"));
+ assertEquals(null, metadata.get("channels"));
+ }
+
+ // TIKA-1024
+ @Test
+ public void testNakedUTF16BOM() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testNakedUTF16BOM.mp3")) {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ }
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("", metadata.get(XMPDM.GENRE));
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
index 36c0efe..aeaf71e 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
@@ -1,92 +1,92 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.ocr;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.InputStream;
-
-import org.apache.tika.TikaTest;
-import org.junit.Test;
-
-public class TesseractOCRConfigTest extends TikaTest {
-
- @Test
- public void testNoConfig() throws Exception {
- TesseractOCRConfig config = new TesseractOCRConfig();
- assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
- assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
- assertEquals("Invalid default language value", "eng", config.getLanguage());
- assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
- assertEquals("Invalid default minFileSizeToOcr value", 0, config.getMinFileSizeToOcr());
- assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
- assertEquals("Invalid default timeout value", 120, config.getTimeout());
- }
-
- @Test
- public void testPartialConfig() throws Exception {
-
- InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
- "/test-properties/TesseractOCRConfig-partial.properties");
-
- TesseractOCRConfig config = new TesseractOCRConfig(stream);
- assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
- assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
- assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
- assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
- assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
- assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
- assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
- }
-
- @Test
- public void testFullConfig() throws Exception {
-
- InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
- "/test-properties/TesseractOCRConfig-full.properties");
-
- TesseractOCRConfig config = new TesseractOCRConfig(stream);
- assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath());
- assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath());
- assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
- assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode());
- assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
- assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr());
- assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
- }
-
- @Test(expected=IllegalArgumentException.class)
- public void testValidateLanguage() {
- TesseractOCRConfig config = new TesseractOCRConfig();
- config.setLanguage("eng");
- config.setLanguage("eng+fra");
- assertTrue("Couldn't set valid values", true);
- config.setLanguage("rm -Rf *");
- }
-
- @Test(expected=IllegalArgumentException.class)
- public void testValidatePageSegMode() {
- TesseractOCRConfig config = new TesseractOCRConfig();
- config.setPageSegMode("0");
- config.setPageSegMode("10");
- assertTrue("Couldn't set valid values", true);
- config.setPageSegMode("11");
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ocr;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.junit.Test;
+
+public class TesseractOCRConfigTest extends TikaTest {
+
+ @Test
+ public void testNoConfig() throws Exception {
+ TesseractOCRConfig config = new TesseractOCRConfig();
+ assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
+ assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
+ assertEquals("Invalid default language value", "eng", config.getLanguage());
+ assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
+ assertEquals("Invalid default minFileSizeToOcr value", 0, config.getMinFileSizeToOcr());
+ assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
+ assertEquals("Invalid default timeout value", 120, config.getTimeout());
+ }
+
+ @Test
+ public void testPartialConfig() throws Exception {
+
+ InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
+ "/test-properties/TesseractOCRConfig-partial.properties");
+
+ TesseractOCRConfig config = new TesseractOCRConfig(stream);
+ assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
+ assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
+ assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
+ assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
+ assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
+ assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
+ assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
+ }
+
+ @Test
+ public void testFullConfig() throws Exception {
+
+ InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
+ "/test-properties/TesseractOCRConfig-full.properties");
+
+ TesseractOCRConfig config = new TesseractOCRConfig(stream);
+ assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath());
+ assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath());
+ assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
+ assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode());
+ assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
+ assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr());
+ assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testValidateLanguage() {
+ TesseractOCRConfig config = new TesseractOCRConfig();
+ config.setLanguage("eng");
+ config.setLanguage("eng+fra");
+ assertTrue("Couldn't set valid values", true);
+ config.setLanguage("rm -Rf *");
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testValidatePageSegMode() {
+ TesseractOCRConfig config = new TesseractOCRConfig();
+ config.setPageSegMode("0");
+ config.setPageSegMode("10");
+ assertTrue("Couldn't set valid values", true);
+ config.setPageSegMode("11");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
index 147113e..d3a876e 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
@@ -1,44 +1,44 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.video;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.tika.Tika;
-import org.apache.tika.metadata.Metadata;
-import org.junit.Test;
-
-public class FLVParserTest {
-
- @Test
- public void testFLV() throws Exception {
- String path = "/test-documents/testFLV.flv";
- Metadata metadata = new Metadata();
-
- String content = new Tika().parseToString(
- FLVParserTest.class.getResourceAsStream(path), metadata);
-
- assertEquals("", content);
- assertEquals("video/x-flv", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("true", metadata.get("hasVideo"));
- assertEquals("false", metadata.get("stereo"));
- assertEquals("true", metadata.get("hasAudio"));
- assertEquals("120.0", metadata.get("height"));
- assertEquals("16.0", metadata.get("audiosamplesize"));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.video;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class FLVParserTest {
+
+ @Test
+ public void testFLV() throws Exception {
+ String path = "/test-documents/testFLV.flv";
+ Metadata metadata = new Metadata();
+
+ String content = new Tika().parseToString(
+ FLVParserTest.class.getResourceAsStream(path), metadata);
+
+ assertEquals("", content);
+ assertEquals("video/x-flv", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("true", metadata.get("hasVideo"));
+ assertEquals("false", metadata.get("stereo"));
+ assertEquals("true", metadata.get("hasAudio"));
+ assertEquals("120.0", metadata.get("height"));
+ assertEquals("16.0", metadata.get("audiosamplesize"));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/pom.xml b/tika-parser-modules/tika-parser-office-module/pom.xml
index 4756328..4825076 100644
--- a/tika-parser-modules/tika-parser-office-module/pom.xml
+++ b/tika-parser-modules/tika-parser-office-module/pom.xml
@@ -1,126 +1,126 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
- license agreements. See the NOTICE file distributed with this work for additional
- information regarding copyright ownership. The ASF licenses this file to
- you under the Apache License, Version 2.0 (the "License"); you may not use
- this file except in compliance with the License. You may obtain a copy of
- the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
- by applicable law or agreed to in writing, software distributed under the
- License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
- OF ANY KIND, either express or implied. See the License for the specific
- language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <parent>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-parser-modules</artifactId>
- <version>2.0-SNAPSHOT</version>
- </parent>
-
- <artifactId>tika-parser-office-module</artifactId>
- <name>Apache Tika parser office module</name>
- <url>http://tika.apache.org/</url>
-
- <dependencies>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>${commons.io.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- <version>${poi.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-scratchpad</artifactId>
- <version>${poi.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- <version>${poi.version}</version>
- <exclusions>
- <exclusion>
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>xml-apis</groupId>
- <artifactId>xml-apis</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>com.healthmarketscience.jackcess</groupId>
- <artifactId>jackcess</artifactId>
- <version>2.1.3</version>
- </dependency>
- <dependency>
- <groupId>com.healthmarketscience.jackcess</groupId>
- <artifactId>jackcess-encrypt</artifactId>
- <version>2.1.1</version>
- <exclusions>
- <exclusion>
- <groupId>org.bouncycastle</groupId>
- <artifactId>bcprov-jdk15on</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <!-- PDFBox (in the tika-parser-pdf-module) and poi's ooxml
- code relies on bouncy castle, as does jackcess-encrypt
- Need to exclude the older library and include the newer one
- if there is a conflict.
- -->
- <dependency>
- <groupId>org.bouncycastle</groupId>
- <artifactId>bcprov-jdk15on</artifactId>
- <version>${bouncycastle.version}</version>
- </dependency>
- <dependency>
- <groupId>com.pff</groupId>
- <artifactId>java-libpst</artifactId>
- <version>0.8.1</version>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parser-package-module</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parser-web-module</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parser-text-module</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
-
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ you under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-modules</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>tika-parser-office-module</artifactId>
+ <name>Apache Tika parser office module</name>
+ <url>http://tika.apache.org/</url>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons.io.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>${poi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>stax</groupId>
+ <artifactId>stax-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>xml-apis</groupId>
+ <artifactId>xml-apis</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.healthmarketscience.jackcess</groupId>
+ <artifactId>jackcess</artifactId>
+ <version>2.1.3</version>
+ </dependency>
+ <dependency>
+ <groupId>com.healthmarketscience.jackcess</groupId>
+ <artifactId>jackcess-encrypt</artifactId>
+ <version>2.1.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15on</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <!-- PDFBox (in the tika-parser-pdf-module) and poi's ooxml
+ code relies on bouncy castle, as does jackcess-encrypt
+ Need to exclude the older library and include the newer one
+ if there is a conflict.
+ -->
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15on</artifactId>
+ <version>${bouncycastle.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.pff</groupId>
+ <artifactId>java-libpst</artifactId>
+ <version>0.8.1</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-package-module</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-web-module</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-text-module</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
index 32a41ab..8f34381 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/module/office/internal/Activator.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.module.office.internal;
-
-import org.apache.tika.osgi.TikaAbstractBundleActivator;
-import org.osgi.framework.BundleContext;
-
-public class Activator extends TikaAbstractBundleActivator {
-
- @Override
- public void start(BundleContext context) throws Exception {
-
- registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
-
- }
-
- @Override
- public void stop(BundleContext context) throws Exception {
-
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.office.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+ @Override
+ public void start(BundleContext context) throws Exception {
+
+ registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+ }
+
+ @Override
+ public void stop(BundleContext context) throws Exception {
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
index c3e85c1..94c5aa5 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/ChmParser.java
@@ -1,112 +1,112 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserProxy;
-import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
-import org.apache.tika.parser.chm.core.ChmExtractor;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.EmbeddedContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class ChmParser extends AbstractParser {
-
- /** Serial version UID */
- private static final long serialVersionUID = 5938777307516469802L;
-
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
- MediaType.application("vnd.ms-htmlhelp"),
- MediaType.application("chm"),
- MediaType.application("x-chm"))));
-
- private final Parser htmlProxy;
-
- public ChmParser() {
- this.htmlProxy = createParserProxy("org.apache.tika.parser.html.HtmlParser");
- }
- @Override
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- @Override
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
- ChmExtractor chmExtractor = new ChmExtractor(stream);
-
- // metadata
- metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
-
- // content
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
-
- for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) {
- final String entryName = entry.getName();
- if (entryName.endsWith(".html")
- || entryName.endsWith(".htm")
- ) {
-// AttributesImpl attrs = new AttributesImpl();
-// attrs.addAttribute("", "name", "name", "String", entryName);
-// xhtml.startElement("", "document", "document", attrs);
-
- byte[] data = chmExtractor.extractChmEntry(entry);
-
- parsePage(data, xhtml);
-
-// xhtml.endElement("", "", "document");
- }
- }
-
- xhtml.endDocument();
- }
-
-
- private void parsePage(byte[] byteObject, ContentHandler xhtml) throws TikaException {// throws IOException
- InputStream stream = null;
- Metadata metadata = new Metadata();
- ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));// -1
- ParseContext parser = new ParseContext();
- try {
- stream = new ByteArrayInputStream(byteObject);
- htmlProxy.parse(stream, handler, metadata, parser);
- } catch (SAXException e) {
- throw new RuntimeException(e);
- } catch (IOException e) {
- // Pushback overflow from tagsoup
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserProxy;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.core.ChmExtractor;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.EmbeddedContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ChmParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = 5938777307516469802L;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ MediaType.application("vnd.ms-htmlhelp"),
+ MediaType.application("chm"),
+ MediaType.application("x-chm"))));
+
+ private final Parser htmlProxy;
+
+ public ChmParser() {
+ this.htmlProxy = createParserProxy("org.apache.tika.parser.html.HtmlParser");
+ }
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+ ChmExtractor chmExtractor = new ChmExtractor(stream);
+
+ // metadata
+ metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
+
+ // content
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+
+ for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) {
+ final String entryName = entry.getName();
+ if (entryName.endsWith(".html")
+ || entryName.endsWith(".htm")
+ ) {
+// AttributesImpl attrs = new AttributesImpl();
+// attrs.addAttribute("", "name", "name", "String", entryName);
+// xhtml.startElement("", "document", "document", attrs);
+
+ byte[] data = chmExtractor.extractChmEntry(entry);
+
+ parsePage(data, xhtml);
+
+// xhtml.endElement("", "", "document");
+ }
+ }
+
+ xhtml.endDocument();
+ }
+
+
+ private void parsePage(byte[] byteObject, ContentHandler xhtml) throws TikaException {// throws IOException
+ InputStream stream = null;
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));// -1
+ ParseContext parser = new ParseContext();
+ try {
+ stream = new ByteArrayInputStream(byteObject);
+ htmlProxy.parse(stream, handler, metadata, parser);
+ } catch (SAXException e) {
+ throw new RuntimeException(e);
+ } catch (IOException e) {
+ // Pushback overflow from tagsoup
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
index 42b0830..e8bf1cc 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
@@ -1,39 +1,39 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.io.Serializable;
-
-import org.apache.tika.exception.TikaException;
-
-/**
- *
- * Defines an accessor interface
- *
- * @param <T>
- */
-public interface ChmAccessor<T> extends Serializable {
- /**
- * Parses chm accessor
- *
- * @param data
- * chm file
- * @param chmAccessor
- * @throws TikaException
- */
- void parse(byte[] data, T chmAccessor) throws TikaException;
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.io.Serializable;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ *
+ * Defines an accessor interface
+ *
+ * @param <T>
+ */
+public interface ChmAccessor<T> extends Serializable {
+ /**
+ * Parses chm accessor
+ *
+ * @param data
+ * chm file
+ * @param chmAccessor
+ * @throws TikaException
+ */
+ void parse(byte[] data, T chmAccessor) throws TikaException;
+}