You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:38 UTC
[32/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
index 98278e2..8d94c0b 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
@@ -1,159 +1,159 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
-import org.xml.sax.SAXException;
-
-/**
- * This is used to parse ID3 Version 2.2 Tag information from an MP3 file,
- * if available.
- *
- * @see <a href="http://id3lib.sourceforge.net/id3/id3v2-00.txt">MP3 ID3 Version 2.2 specification</a>
- */
-public class ID3v22Handler implements ID3Tags {
- private String title;
- private String artist;
- private String album;
- private String year;
- private String composer;
- private String genre;
- private String trackNumber;
- private String albumArtist;
- private String disc;
- private List<ID3Comment> comments = new ArrayList<ID3Comment>();
-
- public ID3v22Handler(ID3v2Frame frame)
- throws IOException, SAXException, TikaException {
- RawTagIterator tags = new RawV22TagIterator(frame);
- while (tags.hasNext()) {
- RawTag tag = tags.next();
- if (tag.name.equals("TT2")) {
- title = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TP1")) {
- artist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TP2")) {
- albumArtist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TAL")) {
- album = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TYE")) {
- year = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCM")) {
- composer = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("COM")) {
- comments.add( getComment(tag.data, 0, tag.data.length) );
- } else if (tag.name.equals("TRK")) {
- trackNumber = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPA")) {
- disc = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCO")) {
- genre = extractGenre( getTagString(tag.data, 0, tag.data.length) );
- }
- }
- }
-
- private String getTagString(byte[] data, int offset, int length) {
- return ID3v2Frame.getTagString(data, offset, length);
- }
- private ID3Comment getComment(byte[] data, int offset, int length) {
- return ID3v2Frame.getComment(data, offset, length);
- }
-
- protected static String extractGenre(String rawGenre) {
- int open = rawGenre.indexOf("(");
- int close = rawGenre.indexOf(")");
- if (open == -1 && close == -1) {
- return rawGenre;
- } else if (open < close) {
- String genreStr = rawGenre.substring(0, open).trim();
- try {
- int genreID = Integer.parseInt(rawGenre.substring(open+1, close));
- return ID3Tags.GENRES[genreID];
- } catch(ArrayIndexOutOfBoundsException invalidNum) {
- return genreStr;
- } catch(NumberFormatException notANum) {
- return genreStr;
- }
- } else {
- return null;
- }
- }
-
- public boolean getTagsPresent() {
- return true;
- }
-
- public String getTitle() {
- return title;
- }
-
- public String getArtist() {
- return artist;
- }
-
- public String getAlbum() {
- return album;
- }
-
- public String getYear() {
- return year;
- }
-
- public String getComposer() {
- return composer;
- }
-
- public List<ID3Comment> getComments() {
- return comments;
- }
-
- public String getGenre() {
- return genre;
- }
-
- public String getTrackNumber() {
- return trackNumber;
- }
-
- public String getAlbumArtist() {
- return albumArtist;
- }
-
- public String getDisc() {
- return disc;
- }
-
- /**
- * ID3v22 doesn't have compilations,
- * so returns null;
- */
- public String getCompilation() {
- return null;
- }
-
- private class RawV22TagIterator extends RawTagIterator {
- private RawV22TagIterator(ID3v2Frame frame) {
- frame.super(3, 3, 1, 0);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
+import org.xml.sax.SAXException;
+
+/**
+ * This is used to parse ID3 Version 2.2 Tag information from an MP3 file,
+ * if available.
+ *
+ * @see <a href="http://id3lib.sourceforge.net/id3/id3v2-00.txt">MP3 ID3 Version 2.2 specification</a>
+ */
+public class ID3v22Handler implements ID3Tags {
+ private String title;
+ private String artist;
+ private String album;
+ private String year;
+ private String composer;
+ private String genre;
+ private String trackNumber;
+ private String albumArtist;
+ private String disc;
+ private List<ID3Comment> comments = new ArrayList<ID3Comment>();
+
+ public ID3v22Handler(ID3v2Frame frame)
+ throws IOException, SAXException, TikaException {
+ RawTagIterator tags = new RawV22TagIterator(frame);
+ while (tags.hasNext()) {
+ RawTag tag = tags.next();
+ if (tag.name.equals("TT2")) {
+ title = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TP1")) {
+ artist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TP2")) {
+ albumArtist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TAL")) {
+ album = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TYE")) {
+ year = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCM")) {
+ composer = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("COM")) {
+ comments.add( getComment(tag.data, 0, tag.data.length) );
+ } else if (tag.name.equals("TRK")) {
+ trackNumber = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPA")) {
+ disc = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCO")) {
+ genre = extractGenre( getTagString(tag.data, 0, tag.data.length) );
+ }
+ }
+ }
+
+ private String getTagString(byte[] data, int offset, int length) {
+ return ID3v2Frame.getTagString(data, offset, length);
+ }
+ private ID3Comment getComment(byte[] data, int offset, int length) {
+ return ID3v2Frame.getComment(data, offset, length);
+ }
+
+ protected static String extractGenre(String rawGenre) {
+ int open = rawGenre.indexOf("(");
+ int close = rawGenre.indexOf(")");
+ if (open == -1 && close == -1) {
+ return rawGenre;
+ } else if (open < close) {
+ String genreStr = rawGenre.substring(0, open).trim();
+ try {
+ int genreID = Integer.parseInt(rawGenre.substring(open+1, close));
+ return ID3Tags.GENRES[genreID];
+ } catch(ArrayIndexOutOfBoundsException invalidNum) {
+ return genreStr;
+ } catch(NumberFormatException notANum) {
+ return genreStr;
+ }
+ } else {
+ return null;
+ }
+ }
+
+ public boolean getTagsPresent() {
+ return true;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getArtist() {
+ return artist;
+ }
+
+ public String getAlbum() {
+ return album;
+ }
+
+ public String getYear() {
+ return year;
+ }
+
+ public String getComposer() {
+ return composer;
+ }
+
+ public List<ID3Comment> getComments() {
+ return comments;
+ }
+
+ public String getGenre() {
+ return genre;
+ }
+
+ public String getTrackNumber() {
+ return trackNumber;
+ }
+
+ public String getAlbumArtist() {
+ return albumArtist;
+ }
+
+ public String getDisc() {
+ return disc;
+ }
+
+ /**
+ * ID3v22 doesn't have compilations,
+ * so returns null;
+ */
+ public String getCompilation() {
+ return null;
+ }
+
+ private class RawV22TagIterator extends RawTagIterator {
+ private RawV22TagIterator(ID3v2Frame frame) {
+ frame.super(3, 3, 1, 0);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
index 8c5386d..4b67eda 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
@@ -1,138 +1,138 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
-import org.xml.sax.SAXException;
-
-/**
- * This is used to parse ID3 Version 2.3 Tag information from an MP3 file,
- * if available.
- *
- * @see <a href="http://id3lib.sourceforge.net/id3/id3v2.3.0.html">MP3 ID3 Version 2.3 specification</a>
- */
-public class ID3v23Handler implements ID3Tags {
- private String title;
- private String artist;
- private String album;
- private String year;
- private String composer;
- private String genre;
- private String trackNumber;
- private String albumArtist;
- private String disc;
- private String compilation;
- private List<ID3Comment> comments = new ArrayList<ID3Comment>();
-
- public ID3v23Handler(ID3v2Frame frame)
- throws IOException, SAXException, TikaException {
- RawTagIterator tags = new RawV23TagIterator(frame);
- while (tags.hasNext()) {
- RawTag tag = tags.next();
- if (tag.name.equals("TIT2")) {
- title = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPE1")) {
- artist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPE2")) {
- albumArtist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TALB")) {
- album = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TYER")) {
- year = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCOM")) {
- composer = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("COMM")) {
- comments.add( getComment(tag.data, 0, tag.data.length) );
- } else if (tag.name.equals("TRCK")) {
- trackNumber = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPOS")) {
- disc = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCMP")) {
- compilation = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCON")) {
- genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) );
- }
- }
- }
-
- private String getTagString(byte[] data, int offset, int length) {
- return ID3v2Frame.getTagString(data, offset, length);
- }
- private ID3Comment getComment(byte[] data, int offset, int length) {
- return ID3v2Frame.getComment(data, offset, length);
- }
-
- public boolean getTagsPresent() {
- return true;
- }
-
- public String getTitle() {
- return title;
- }
-
- public String getArtist() {
- return artist;
- }
-
- public String getAlbum() {
- return album;
- }
-
- public String getYear() {
- return year;
- }
-
- public String getComposer() {
- return composer;
- }
-
- public List<ID3Comment> getComments() {
- return comments;
- }
-
- public String getGenre() {
- return genre;
- }
-
- public String getTrackNumber() {
- return trackNumber;
- }
-
- public String getAlbumArtist() {
- return albumArtist;
- }
-
- public String getDisc() {
- return disc;
- }
-
- public String getCompilation() {
- return compilation;
- }
-
- private class RawV23TagIterator extends RawTagIterator {
- private RawV23TagIterator(ID3v2Frame frame) {
- frame.super(4, 4, 1, 2);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
+import org.xml.sax.SAXException;
+
+/**
+ * This is used to parse ID3 Version 2.3 Tag information from an MP3 file,
+ * if available.
+ *
+ * @see <a href="http://id3lib.sourceforge.net/id3/id3v2.3.0.html">MP3 ID3 Version 2.3 specification</a>
+ */
+public class ID3v23Handler implements ID3Tags {
+ private String title;
+ private String artist;
+ private String album;
+ private String year;
+ private String composer;
+ private String genre;
+ private String trackNumber;
+ private String albumArtist;
+ private String disc;
+ private String compilation;
+ private List<ID3Comment> comments = new ArrayList<ID3Comment>();
+
+ public ID3v23Handler(ID3v2Frame frame)
+ throws IOException, SAXException, TikaException {
+ RawTagIterator tags = new RawV23TagIterator(frame);
+ while (tags.hasNext()) {
+ RawTag tag = tags.next();
+ if (tag.name.equals("TIT2")) {
+ title = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPE1")) {
+ artist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPE2")) {
+ albumArtist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TALB")) {
+ album = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TYER")) {
+ year = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCOM")) {
+ composer = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("COMM")) {
+ comments.add( getComment(tag.data, 0, tag.data.length) );
+ } else if (tag.name.equals("TRCK")) {
+ trackNumber = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPOS")) {
+ disc = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCMP")) {
+ compilation = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCON")) {
+ genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) );
+ }
+ }
+ }
+
+ private String getTagString(byte[] data, int offset, int length) {
+ return ID3v2Frame.getTagString(data, offset, length);
+ }
+ private ID3Comment getComment(byte[] data, int offset, int length) {
+ return ID3v2Frame.getComment(data, offset, length);
+ }
+
+ public boolean getTagsPresent() {
+ return true;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getArtist() {
+ return artist;
+ }
+
+ public String getAlbum() {
+ return album;
+ }
+
+ public String getYear() {
+ return year;
+ }
+
+ public String getComposer() {
+ return composer;
+ }
+
+ public List<ID3Comment> getComments() {
+ return comments;
+ }
+
+ public String getGenre() {
+ return genre;
+ }
+
+ public String getTrackNumber() {
+ return trackNumber;
+ }
+
+ public String getAlbumArtist() {
+ return albumArtist;
+ }
+
+ public String getDisc() {
+ return disc;
+ }
+
+ public String getCompilation() {
+ return compilation;
+ }
+
+ private class RawV23TagIterator extends RawTagIterator {
+ private RawV23TagIterator(ID3v2Frame frame) {
+ frame.super(4, 4, 1, 2);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
index 5c16937..caba928 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
@@ -1,143 +1,143 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
-import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
-import org.xml.sax.SAXException;
-
-/**
- * This is used to parse ID3 Version 2.4 Tag information from an MP3 file,
- * if available.
- *
- * @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 specification</a>
- * @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 frames/tags</a>
- */
-public class ID3v24Handler implements ID3Tags {
- private String title;
- private String artist;
- private String album;
- private String year;
- private String composer;
- private String genre;
- private String trackNumber;
- private String albumArtist;
- private String disc;
- private String compilation;
- private List<ID3Comment> comments = new ArrayList<ID3Comment>();
-
- public ID3v24Handler(ID3v2Frame frame)
- throws IOException, SAXException, TikaException {
- RawTagIterator tags = new RawV24TagIterator(frame);
- while (tags.hasNext()) {
- RawTag tag = tags.next();
- if (tag.name.equals("TIT2")) {
- title = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPE1")) {
- artist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPE2")) {
- albumArtist = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TALB")) {
- album = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TYER")) {
- year = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TDRC")) {
- if(year == null) {
- year = getTagString(tag.data, 0, tag.data.length);
- }
- } else if (tag.name.equals("TCOM")) {
- composer = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("COMM")) {
- comments.add( getComment(tag.data, 0, tag.data.length) );
- } else if (tag.name.equals("TRCK")) {
- trackNumber = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TPOS")) {
- disc = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCMP")) {
- compilation = getTagString(tag.data, 0, tag.data.length);
- } else if (tag.name.equals("TCON")) {
- genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) );
- }
- }
- }
-
- private String getTagString(byte[] data, int offset, int length) {
- return ID3v2Frame.getTagString(data, offset, length);
- }
- private ID3Comment getComment(byte[] data, int offset, int length) {
- return ID3v2Frame.getComment(data, offset, length);
- }
-
- public boolean getTagsPresent() {
- return true;
- }
-
- public String getTitle() {
- return title;
- }
-
- public String getArtist() {
- return artist;
- }
-
- public String getAlbum() {
- return album;
- }
-
- public String getYear() {
- return year;
- }
-
- public String getComposer() {
- return composer;
- }
-
- public List<ID3Comment> getComments() {
- return comments;
- }
-
- public String getGenre() {
- return genre;
- }
-
- public String getTrackNumber() {
- return trackNumber;
- }
-
- public String getAlbumArtist() {
- return albumArtist;
- }
-
- public String getDisc() {
- return disc;
- }
-
- public String getCompilation() {
- return compilation;
- }
-
- private class RawV24TagIterator extends RawTagIterator {
- private RawV24TagIterator(ID3v2Frame frame) {
- frame.super(4, 4, 1, 2);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTag;
+import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator;
+import org.xml.sax.SAXException;
+
+/**
+ * This is used to parse ID3 Version 2.4 Tag information from an MP3 file,
+ * if available.
+ *
+ * @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 specification</a>
+ * @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 frames/tags</a>
+ */
+public class ID3v24Handler implements ID3Tags {
+ private String title;
+ private String artist;
+ private String album;
+ private String year;
+ private String composer;
+ private String genre;
+ private String trackNumber;
+ private String albumArtist;
+ private String disc;
+ private String compilation;
+ private List<ID3Comment> comments = new ArrayList<ID3Comment>();
+
+ public ID3v24Handler(ID3v2Frame frame)
+ throws IOException, SAXException, TikaException {
+ RawTagIterator tags = new RawV24TagIterator(frame);
+ while (tags.hasNext()) {
+ RawTag tag = tags.next();
+ if (tag.name.equals("TIT2")) {
+ title = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPE1")) {
+ artist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPE2")) {
+ albumArtist = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TALB")) {
+ album = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TYER")) {
+ year = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TDRC")) {
+ if(year == null) {
+ year = getTagString(tag.data, 0, tag.data.length);
+ }
+ } else if (tag.name.equals("TCOM")) {
+ composer = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("COMM")) {
+ comments.add( getComment(tag.data, 0, tag.data.length) );
+ } else if (tag.name.equals("TRCK")) {
+ trackNumber = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TPOS")) {
+ disc = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCMP")) {
+ compilation = getTagString(tag.data, 0, tag.data.length);
+ } else if (tag.name.equals("TCON")) {
+ genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) );
+ }
+ }
+ }
+
+ private String getTagString(byte[] data, int offset, int length) {
+ return ID3v2Frame.getTagString(data, offset, length);
+ }
+ private ID3Comment getComment(byte[] data, int offset, int length) {
+ return ID3v2Frame.getComment(data, offset, length);
+ }
+
+ public boolean getTagsPresent() {
+ return true;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getArtist() {
+ return artist;
+ }
+
+ public String getAlbum() {
+ return album;
+ }
+
+ public String getYear() {
+ return year;
+ }
+
+ public String getComposer() {
+ return composer;
+ }
+
+ public List<ID3Comment> getComments() {
+ return comments;
+ }
+
+ public String getGenre() {
+ return genre;
+ }
+
+ public String getTrackNumber() {
+ return trackNumber;
+ }
+
+ public String getAlbumArtist() {
+ return albumArtist;
+ }
+
+ public String getDisc() {
+ return disc;
+ }
+
+ public String getCompilation() {
+ return compilation;
+ }
+
+ private class RawV24TagIterator extends RawTagIterator {
+ private RawV24TagIterator(ID3v2Frame frame) {
+ frame.super(4, 4, 1, 2);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
index 458c5e2..41298dd 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
@@ -1,424 +1,424 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Iterator;
-
-import org.apache.tika.parser.mp3.ID3Tags.ID3Comment;
-
-import static java.nio.charset.StandardCharsets.ISO_8859_1;
-
-/**
- * A frame of ID3v2 data, which is then passed to a handler to
- * be turned into useful data.
- */
-public class ID3v2Frame implements MP3Frame {
- private int majorVersion;
- private int minorVersion;
- private int flags;
- private int length;
- /** Excludes the header size part */
- private byte[] extendedHeader;
- private byte[] data;
-
- public int getMajorVersion() {
- return majorVersion;
- }
-
- public int getMinorVersion() {
- return minorVersion;
- }
-
- public int getFlags() {
- return flags;
- }
-
- public int getLength() {
- return length;
- }
-
- public byte[] getExtendedHeader() {
- return extendedHeader;
- }
-
- public byte[] getData() {
- return data;
- }
-
- /**
- * Returns the next ID3v2 Frame in
- * the file, or null if the next batch of data
- * doesn't correspond to either an ID3v2 header.
- * If no ID3v2 frame could be detected and the passed in input stream is a
- * {@code PushbackInputStream}, the bytes read so far are pushed back so
- * that they can be read again.
- * ID3v2 Frames should come before all Audio ones.
- */
- public static MP3Frame createFrameIfPresent(InputStream inp)
- throws IOException {
- int h1 = inp.read();
- int h2 = inp.read();
- int h3 = inp.read();
-
- // Is it an ID3v2 Frame?
- if (h1 == (int)'I' && h2 == (int)'D' && h3 == (int)'3') {
- int majorVersion = inp.read();
- int minorVersion = inp.read();
- if (majorVersion == -1 || minorVersion == -1) {
- pushBack(inp, h1, h2, h3, majorVersion, minorVersion);
- return null;
- }
- return new ID3v2Frame(majorVersion, minorVersion, inp);
- }
-
- // Not a frame header
- pushBack(inp, h1, h2, h3);
- return null;
- }
-
- /**
- * Pushes bytes back into the stream if possible. This method is called if
- * no ID3v2 header could be found at the current stream position.
- *
- * @param inp the input stream
- * @param bytes the bytes to be pushed back
- * @throws IOException if an error occurs
- */
- private static void pushBack(InputStream inp, int... bytes)
- throws IOException
- {
- if (inp instanceof PushbackInputStream)
- {
- byte[] buf = new byte[bytes.length];
- for (int i = 0; i < bytes.length; i++)
- {
- buf[i] = (byte) bytes[i];
- }
- ((PushbackInputStream) inp).unread(buf);
- }
- }
-
- private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp)
- throws IOException {
- this.majorVersion = majorVersion;
- this.minorVersion = minorVersion;
-
- // Get the flags and the length
- flags = inp.read();
- length = get7BitsInt(readFully(inp, 4), 0);
-
- // Do we have an extended header?
- if ((flags & 0x02) == 0x02) {
- int size = getInt(readFully(inp, 4));
- extendedHeader = readFully(inp, size);
- }
-
- // Get the frame's data, or at least as much
- // of it as we could do
- data = readFully(inp, length, false);
- }
-
- protected static int getInt(byte[] data) {
- return getInt(data, 0);
- }
-
- protected static int getInt(byte[] data, int offset) {
- int b0 = data[offset+0] & 0xFF;
- int b1 = data[offset+1] & 0xFF;
- int b2 = data[offset+2] & 0xFF;
- int b3 = data[offset+3] & 0xFF;
- return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
- }
-
- protected static int getInt3(byte[] data, int offset) {
- int b0 = data[offset+0] & 0xFF;
- int b1 = data[offset+1] & 0xFF;
- int b2 = data[offset+2] & 0xFF;
- return (b0 << 16) + (b1 << 8) + (b2 << 0);
- }
-
- protected static int getInt2(byte[] data, int offset) {
- int b0 = data[offset+0] & 0xFF;
- int b1 = data[offset+1] & 0xFF;
- return (b0 << 8) + (b1 << 0);
- }
-
- /**
- * AKA a Synchsafe integer.
- * 4 bytes hold a 28 bit number. The highest
- * bit in each byte is always 0 and always ignored.
- */
- protected static int get7BitsInt(byte[] data, int offset) {
- int b0 = data[offset+0] & 0x7F;
- int b1 = data[offset+1] & 0x7F;
- int b2 = data[offset+2] & 0x7F;
- int b3 = data[offset+3] & 0x7F;
- return (b0 << 21) + (b1 << 14) + (b2 << 7) + (b3 << 0);
- }
-
- protected static byte[] readFully(InputStream inp, int length)
- throws IOException {
- return readFully(inp, length, true);
- }
- protected static byte[] readFully(InputStream inp, int length, boolean shortDataIsFatal)
- throws IOException {
- byte[] b = new byte[length];
-
- int pos = 0;
- int read;
- while (pos < length) {
- read = inp.read(b, pos, length-pos);
- if (read == -1) {
- if(shortDataIsFatal) {
- throw new IOException("Tried to read " + length + " bytes, but only " + pos + " bytes present");
- } else {
- // Give them what we found
- // TODO Log the short read
- return b;
- }
- }
- pos += read;
- }
-
- return b;
- }
-
- protected static class TextEncoding {
- public final boolean doubleByte;
- public final String encoding;
- private TextEncoding(String encoding, boolean doubleByte) {
- this.doubleByte = doubleByte;
- this.encoding = encoding;
- }
- }
- protected static final TextEncoding[] encodings = new TextEncoding[] {
- new TextEncoding("ISO-8859-1", false),
- new TextEncoding("UTF-16", true), // With BOM
- new TextEncoding("UTF-16BE", true), // Without BOM
- new TextEncoding("UTF-8", false)
- };
-
- /**
- * Returns the (possibly null padded) String at the given offset and
- * length. String encoding is held in the first byte;
- */
- protected static String getTagString(byte[] data, int offset, int length) {
- int actualLength = length;
- if (actualLength == 0) {
- return "";
- }
- if (actualLength == 1 && data[offset] == 0) {
- return "";
- }
-
- // Does it have an encoding flag?
- // Detect by the first byte being sub 0x20
- TextEncoding encoding = encodings[0];
- byte maybeEncodingFlag = data[offset];
- if (maybeEncodingFlag >= 0 && maybeEncodingFlag < encodings.length) {
- offset++;
- actualLength--;
- encoding = encodings[maybeEncodingFlag];
- }
-
- // Trim off null termination / padding (as present)
- while (encoding.doubleByte && actualLength >= 2 && data[offset+actualLength-1] == 0 && data[offset+actualLength-2] == 0) {
- actualLength -= 2;
- }
- while (!encoding.doubleByte && actualLength >= 1 && data[offset+actualLength-1] == 0) {
- actualLength--;
- }
- if (actualLength == 0) {
- return "";
- }
-
- // TIKA-1024: If it's UTF-16 (with BOM) and all we
- // have is a naked BOM then short-circuit here
- // (return empty string), because new String(..)
- // gives different results on different JVMs
- if (encoding.encoding.equals("UTF-16") && actualLength == 2 &&
- ((data[offset] == (byte) 0xff && data[offset+1] == (byte) 0xfe) ||
- (data[offset] == (byte) 0xfe && data[offset+1] == (byte) 0xff))) {
- return "";
- }
-
- try {
- // Build the base string
- return new String(data, offset, actualLength, encoding.encoding);
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(
- "Core encoding " + encoding.encoding + " is not available", e);
- }
- }
- /**
- * Builds up the ID3 comment, by parsing and extracting
- * the comment string parts from the given data.
- */
- protected static ID3Comment getComment(byte[] data, int offset, int length) {
- // Comments must have an encoding
- int encodingFlag = data[offset];
- if (encodingFlag >= 0 && encodingFlag < encodings.length) {
- // Good, valid flag
- } else {
- // Invalid string
- return null;
- }
-
- TextEncoding encoding = encodings[encodingFlag];
-
- // First is a 3 byte language
- String lang = getString(data, offset+1, 3);
-
- // After that we have [Desc]\0(\0)[Text]
- int descStart = offset+4;
- int textStart = -1;
- String description = null;
- String text = null;
-
- // Find where the description ends
- try {
- for (int i=descStart; i<offset+length; i++) {
- if (encoding.doubleByte && data[i]==0 && data[i+1] == 0) {
- // Handle LE vs BE on low byte text
- if (i+2 < offset+length && data[i+1] == 0 && data[i+2] == 0) {
- i++;
- }
- textStart = i+2;
- description = new String(data, descStart, i-descStart, encoding.encoding);
- break;
- }
- if (!encoding.doubleByte && data[i]==0) {
- textStart = i+1;
- description = new String(data, descStart, i-descStart, encoding.encoding);
- break;
- }
- }
-
- // Did we find the end?
- if (textStart > -1) {
- text = new String(data, textStart, offset+length-textStart, encoding.encoding);
- } else {
- // Assume everything is the text
- text = new String(data, descStart, offset+length-descStart, encoding.encoding);
- }
-
- // Return
- return new ID3Comment(lang, description, text);
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(
- "Core encoding " + encoding.encoding + " is not available", e);
- }
- }
-
- /**
- * Returns the String at the given
- * offset and length. Strings are ISO-8859-1
- */
- protected static String getString(byte[] data, int offset, int length) {
- return new String(data, offset, length, ISO_8859_1);
- }
-
-
- /**
- * Iterates over id3v2 raw tags.
- * Create an instance of this that configures the
- * various length and multipliers.
- */
- protected class RawTagIterator implements Iterator<RawTag> {
- private int nameLength;
- private int sizeLength;
- private int sizeMultiplier;
- private int flagLength;
-
- private int offset = 0;
-
- protected RawTagIterator(
- int nameLength, int sizeLength, int sizeMultiplier,
- int flagLength) {
- this.nameLength = nameLength;
- this.sizeLength = sizeLength;
- this.sizeMultiplier = sizeMultiplier;
- this.flagLength = flagLength;
- }
-
- public boolean hasNext() {
- // Check for padding at the end
- return offset < data.length && data[offset] != 0;
- }
-
- public RawTag next() {
- RawTag tag = new RawTag(nameLength, sizeLength, sizeMultiplier,
- flagLength, data, offset);
- offset += tag.getSize();
- return tag;
- }
-
- public void remove() {
- }
-
- }
-
- protected static class RawTag {
- private int headerSize;
- protected String name;
- protected int flag;
- protected byte[] data;
-
- private RawTag(
- int nameLength, int sizeLength, int sizeMultiplier,
- int flagLength, byte[] frameData, int offset) {
- headerSize = nameLength + sizeLength + flagLength;
-
- // Name, normally 3 or 4 bytes
- name = getString(frameData, offset, nameLength);
-
- // Size
- int rawSize;
- if (sizeLength == 3) {
- rawSize = getInt3(frameData, offset+nameLength);
- } else {
- rawSize = getInt(frameData, offset+nameLength);
- }
- int size = rawSize * sizeMultiplier;
-
- // Flag
- if (flagLength > 0) {
- if (flagLength == 1) {
- flag = (int)frameData[offset+nameLength+sizeLength];
- } else {
- flag = getInt2(frameData, offset+nameLength+sizeLength);
- }
- }
-
- // Now data
- int copyFrom = offset+nameLength+sizeLength+flagLength;
- size = Math.max(0, Math.min(size, frameData.length-copyFrom)); // TIKA-1218, prevent negative size for malformed files.
- data = new byte[size];
- System.arraycopy(frameData, copyFrom, data, 0, size);
- }
-
- protected int getSize() {
- return headerSize + data.length;
- }
-
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Iterator;
+
+import org.apache.tika.parser.mp3.ID3Tags.ID3Comment;
+
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
+/**
+ * A frame of ID3v2 data, which is then passed to a handler to
+ * be turned into useful data.
+ */
+public class ID3v2Frame implements MP3Frame {
+ private int majorVersion;
+ private int minorVersion;
+ private int flags;
+ private int length;
+ /** Excludes the header size part */
+ private byte[] extendedHeader;
+ private byte[] data;
+
+ public int getMajorVersion() {
+ return majorVersion;
+ }
+
+ public int getMinorVersion() {
+ return minorVersion;
+ }
+
+ public int getFlags() {
+ return flags;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public byte[] getExtendedHeader() {
+ return extendedHeader;
+ }
+
+ public byte[] getData() {
+ return data;
+ }
+
+ /**
+ * Returns the next ID3v2 Frame in
+ * the file, or null if the next batch of data
+ * doesn't correspond to either an ID3v2 header.
+ * If no ID3v2 frame could be detected and the passed in input stream is a
+ * {@code PushbackInputStream}, the bytes read so far are pushed back so
+ * that they can be read again.
+ * ID3v2 Frames should come before all Audio ones.
+ */
+ public static MP3Frame createFrameIfPresent(InputStream inp)
+ throws IOException {
+ int h1 = inp.read();
+ int h2 = inp.read();
+ int h3 = inp.read();
+
+ // Is it an ID3v2 Frame?
+ if (h1 == (int)'I' && h2 == (int)'D' && h3 == (int)'3') {
+ int majorVersion = inp.read();
+ int minorVersion = inp.read();
+ if (majorVersion == -1 || minorVersion == -1) {
+ pushBack(inp, h1, h2, h3, majorVersion, minorVersion);
+ return null;
+ }
+ return new ID3v2Frame(majorVersion, minorVersion, inp);
+ }
+
+ // Not a frame header
+ pushBack(inp, h1, h2, h3);
+ return null;
+ }
+
+ /**
+ * Pushes bytes back into the stream if possible. This method is called if
+ * no ID3v2 header could be found at the current stream position.
+ *
+ * @param inp the input stream
+ * @param bytes the bytes to be pushed back
+ * @throws IOException if an error occurs
+ */
+ private static void pushBack(InputStream inp, int... bytes)
+ throws IOException
+ {
+ if (inp instanceof PushbackInputStream)
+ {
+ byte[] buf = new byte[bytes.length];
+ for (int i = 0; i < bytes.length; i++)
+ {
+ buf[i] = (byte) bytes[i];
+ }
+ ((PushbackInputStream) inp).unread(buf);
+ }
+ }
+
+ private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp)
+ throws IOException {
+ this.majorVersion = majorVersion;
+ this.minorVersion = minorVersion;
+
+ // Get the flags and the length
+ flags = inp.read();
+ length = get7BitsInt(readFully(inp, 4), 0);
+
+ // Do we have an extended header?
+ if ((flags & 0x02) == 0x02) {
+ int size = getInt(readFully(inp, 4));
+ extendedHeader = readFully(inp, size);
+ }
+
+ // Get the frame's data, or at least as much
+ // of it as we could do
+ data = readFully(inp, length, false);
+ }
+
+ protected static int getInt(byte[] data) {
+ return getInt(data, 0);
+ }
+
+ protected static int getInt(byte[] data, int offset) {
+ int b0 = data[offset+0] & 0xFF;
+ int b1 = data[offset+1] & 0xFF;
+ int b2 = data[offset+2] & 0xFF;
+ int b3 = data[offset+3] & 0xFF;
+ return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
+ }
+
+ protected static int getInt3(byte[] data, int offset) {
+ int b0 = data[offset+0] & 0xFF;
+ int b1 = data[offset+1] & 0xFF;
+ int b2 = data[offset+2] & 0xFF;
+ return (b0 << 16) + (b1 << 8) + (b2 << 0);
+ }
+
+ protected static int getInt2(byte[] data, int offset) {
+ int b0 = data[offset+0] & 0xFF;
+ int b1 = data[offset+1] & 0xFF;
+ return (b0 << 8) + (b1 << 0);
+ }
+
+ /**
+ * AKA a Synchsafe integer.
+ * 4 bytes hold a 28 bit number. The highest
+ * bit in each byte is always 0 and always ignored.
+ */
+ protected static int get7BitsInt(byte[] data, int offset) {
+ int b0 = data[offset+0] & 0x7F;
+ int b1 = data[offset+1] & 0x7F;
+ int b2 = data[offset+2] & 0x7F;
+ int b3 = data[offset+3] & 0x7F;
+ return (b0 << 21) + (b1 << 14) + (b2 << 7) + (b3 << 0);
+ }
+
+ protected static byte[] readFully(InputStream inp, int length)
+ throws IOException {
+ return readFully(inp, length, true);
+ }
+ protected static byte[] readFully(InputStream inp, int length, boolean shortDataIsFatal)
+ throws IOException {
+ byte[] b = new byte[length];
+
+ int pos = 0;
+ int read;
+ while (pos < length) {
+ read = inp.read(b, pos, length-pos);
+ if (read == -1) {
+ if(shortDataIsFatal) {
+ throw new IOException("Tried to read " + length + " bytes, but only " + pos + " bytes present");
+ } else {
+ // Give them what we found
+ // TODO Log the short read
+ return b;
+ }
+ }
+ pos += read;
+ }
+
+ return b;
+ }
+
+ protected static class TextEncoding {
+ public final boolean doubleByte;
+ public final String encoding;
+ private TextEncoding(String encoding, boolean doubleByte) {
+ this.doubleByte = doubleByte;
+ this.encoding = encoding;
+ }
+ }
+ protected static final TextEncoding[] encodings = new TextEncoding[] {
+ new TextEncoding("ISO-8859-1", false),
+ new TextEncoding("UTF-16", true), // With BOM
+ new TextEncoding("UTF-16BE", true), // Without BOM
+ new TextEncoding("UTF-8", false)
+ };
+
+ /**
+ * Returns the (possibly null padded) String at the given offset and
+ * length. String encoding is held in the first byte;
+ */
+ protected static String getTagString(byte[] data, int offset, int length) {
+ int actualLength = length;
+ if (actualLength == 0) {
+ return "";
+ }
+ if (actualLength == 1 && data[offset] == 0) {
+ return "";
+ }
+
+ // Does it have an encoding flag?
+ // Detect by the first byte being sub 0x20
+ TextEncoding encoding = encodings[0];
+ byte maybeEncodingFlag = data[offset];
+ if (maybeEncodingFlag >= 0 && maybeEncodingFlag < encodings.length) {
+ offset++;
+ actualLength--;
+ encoding = encodings[maybeEncodingFlag];
+ }
+
+ // Trim off null termination / padding (as present)
+ while (encoding.doubleByte && actualLength >= 2 && data[offset+actualLength-1] == 0 && data[offset+actualLength-2] == 0) {
+ actualLength -= 2;
+ }
+ while (!encoding.doubleByte && actualLength >= 1 && data[offset+actualLength-1] == 0) {
+ actualLength--;
+ }
+ if (actualLength == 0) {
+ return "";
+ }
+
+ // TIKA-1024: If it's UTF-16 (with BOM) and all we
+ // have is a naked BOM then short-circuit here
+ // (return empty string), because new String(..)
+ // gives different results on different JVMs
+ if (encoding.encoding.equals("UTF-16") && actualLength == 2 &&
+ ((data[offset] == (byte) 0xff && data[offset+1] == (byte) 0xfe) ||
+ (data[offset] == (byte) 0xfe && data[offset+1] == (byte) 0xff))) {
+ return "";
+ }
+
+ try {
+ // Build the base string
+ return new String(data, offset, actualLength, encoding.encoding);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(
+ "Core encoding " + encoding.encoding + " is not available", e);
+ }
+ }
+ /**
+ * Builds up the ID3 comment, by parsing and extracting
+ * the comment string parts from the given data.
+ */
+ protected static ID3Comment getComment(byte[] data, int offset, int length) {
+ // Comments must have an encoding
+ int encodingFlag = data[offset];
+ if (encodingFlag >= 0 && encodingFlag < encodings.length) {
+ // Good, valid flag
+ } else {
+ // Invalid string
+ return null;
+ }
+
+ TextEncoding encoding = encodings[encodingFlag];
+
+ // First is a 3 byte language
+ String lang = getString(data, offset+1, 3);
+
+ // After that we have [Desc]\0(\0)[Text]
+ int descStart = offset+4;
+ int textStart = -1;
+ String description = null;
+ String text = null;
+
+ // Find where the description ends
+ try {
+ for (int i=descStart; i<offset+length; i++) {
+ if (encoding.doubleByte && data[i]==0 && data[i+1] == 0) {
+ // Handle LE vs BE on low byte text
+ if (i+2 < offset+length && data[i+1] == 0 && data[i+2] == 0) {
+ i++;
+ }
+ textStart = i+2;
+ description = new String(data, descStart, i-descStart, encoding.encoding);
+ break;
+ }
+ if (!encoding.doubleByte && data[i]==0) {
+ textStart = i+1;
+ description = new String(data, descStart, i-descStart, encoding.encoding);
+ break;
+ }
+ }
+
+ // Did we find the end?
+ if (textStart > -1) {
+ text = new String(data, textStart, offset+length-textStart, encoding.encoding);
+ } else {
+ // Assume everything is the text
+ text = new String(data, descStart, offset+length-descStart, encoding.encoding);
+ }
+
+ // Return
+ return new ID3Comment(lang, description, text);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(
+ "Core encoding " + encoding.encoding + " is not available", e);
+ }
+ }
+
+ /**
+ * Returns the String at the given
+ * offset and length. Strings are ISO-8859-1
+ */
+ protected static String getString(byte[] data, int offset, int length) {
+ return new String(data, offset, length, ISO_8859_1);
+ }
+
+
+ /**
+ * Iterates over id3v2 raw tags.
+ * Create an instance of this that configures the
+ * various length and multipliers.
+ */
+ protected class RawTagIterator implements Iterator<RawTag> {
+ private int nameLength;
+ private int sizeLength;
+ private int sizeMultiplier;
+ private int flagLength;
+
+ private int offset = 0;
+
+ protected RawTagIterator(
+ int nameLength, int sizeLength, int sizeMultiplier,
+ int flagLength) {
+ this.nameLength = nameLength;
+ this.sizeLength = sizeLength;
+ this.sizeMultiplier = sizeMultiplier;
+ this.flagLength = flagLength;
+ }
+
+ public boolean hasNext() {
+ // Check for padding at the end
+ return offset < data.length && data[offset] != 0;
+ }
+
+ public RawTag next() {
+ RawTag tag = new RawTag(nameLength, sizeLength, sizeMultiplier,
+ flagLength, data, offset);
+ offset += tag.getSize();
+ return tag;
+ }
+
+ public void remove() {
+ }
+
+ }
+
+ protected static class RawTag {
+ private int headerSize;
+ protected String name;
+ protected int flag;
+ protected byte[] data;
+
+ private RawTag(
+ int nameLength, int sizeLength, int sizeMultiplier,
+ int flagLength, byte[] frameData, int offset) {
+ headerSize = nameLength + sizeLength + flagLength;
+
+ // Name, normally 3 or 4 bytes
+ name = getString(frameData, offset, nameLength);
+
+ // Size
+ int rawSize;
+ if (sizeLength == 3) {
+ rawSize = getInt3(frameData, offset+nameLength);
+ } else {
+ rawSize = getInt(frameData, offset+nameLength);
+ }
+ int size = rawSize * sizeMultiplier;
+
+ // Flag
+ if (flagLength > 0) {
+ if (flagLength == 1) {
+ flag = (int)frameData[offset+nameLength+sizeLength];
+ } else {
+ flag = getInt2(frameData, offset+nameLength+sizeLength);
+ }
+ }
+
+ // Now data
+ int copyFrom = offset+nameLength+sizeLength+flagLength;
+ size = Math.max(0, Math.min(size, frameData.length-copyFrom)); // TIKA-1218, prevent negative size for malformed files.
+ data = new byte[size];
+ System.arraycopy(frameData, copyFrom, data, 0, size);
+ }
+
+ protected int getSize() {
+ return headerSize + data.length;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
index 54b9ae9..12d0f2d 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
@@ -1,156 +1,156 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.tika.exception.TikaException;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * This is used to parse Lyrics3 tag information
- * from an MP3 file, if available.
- * Handles lyrics tags of up to 10kb in size.
- * Will process any ID3v1 tag data if present.
- * Ignores extended ID3v1 data in the lyrics block
- *
- * @see <a href="http://www.id3.org/Lyrics3v2">Lyrics3 v2.0 specification</a>
- */
-public class LyricsHandler {
- boolean foundLyrics = false;
- String lyricsText = null;
- ID3v1Handler id3v1 = null;
-
- public LyricsHandler(InputStream stream, ContentHandler handler)
- throws IOException, SAXException, TikaException {
- this(getSuffix(stream, 10240+128));
- }
-
- /**
- * Looks for the Lyrics data, which will be
- * just before the ID3v1 data (if present),
- * and process it.
- * Also sets things up for the ID3v1
- * processing if required.
- * Creates from the last 128 bytes of a stream.
- */
- protected LyricsHandler(byte[] tagData)
- throws IOException, SAXException, TikaException {
- if(tagData.length < 128) {
- return;
- }
-
- // Is there ID3v1 data?
- byte[] last128 = new byte[128];
- System.arraycopy(tagData, tagData.length-128, last128, 0, 128);
- id3v1 = new ID3v1Handler(last128);
-
- if(tagData.length < 137) {
- return;
- }
-
- // Are there lyrics? Look for the closing Lyrics tag
- // at the end to decide if there is any
- int lookat = tagData.length - 9;
- if(id3v1.found) {
- lookat -= 128;
- }
- if(tagData[lookat+0] == 'L' && tagData[lookat+1] == 'Y' &&
- tagData[lookat+2] == 'R' && tagData[lookat+3] == 'I' &&
- tagData[lookat+4] == 'C' && tagData[lookat+5] == 'S' &&
- tagData[lookat+6] == '2' && tagData[lookat+7] == '0' &&
- tagData[lookat+8] == '0') {
- foundLyrics = true;
-
- // The length (6 bytes) comes just before LYRICS200, and is the
- // size including the LYRICSBEGIN but excluding the
- // length+LYRICS200 at the end.
- int length = Integer.parseInt(
- new String(tagData, lookat-6, 6, UTF_8)
- );
-
- String lyrics = new String(
- tagData, lookat-length+5, length-11,
- US_ASCII
- );
-
- // Tags are a 3 letter code, 5 digit length, then data
- int pos = 0;
- while(pos < lyrics.length()-8) {
- String tagName = lyrics.substring(pos, pos+3);
- int tagLen = Integer.parseInt(
- lyrics.substring(pos+3, pos+8)
- );
- int startPos = pos + 8;
- int endPos = startPos + tagLen;
-
- if(tagName.equals("LYR")) {
- lyricsText = lyrics.substring(startPos, endPos);
- }
-
- pos = endPos;
- }
- }
- }
-
- public boolean hasID3v1() {
- if(id3v1 == null || id3v1.found == false) {
- return false;
- }
- return true;
- }
- public boolean hasLyrics() {
- return lyricsText != null && lyricsText.length() > 0;
- }
-
- /**
- * Reads and returns the last <code>length</code> bytes from the
- * given stream.
- * @param stream input stream
- * @param length number of bytes from the end to read and return
- * @return stream the <code>InputStream</code> to read from.
- * @throws IOException if the stream could not be read from.
- */
- protected static byte[] getSuffix(InputStream stream, int length)
- throws IOException {
- byte[] buffer = new byte[2 * length];
- int bytesInBuffer = 0;
-
- int n = stream.read(buffer);
- while (n != -1) {
- bytesInBuffer += n;
- if (bytesInBuffer == buffer.length) {
- System.arraycopy(buffer, bytesInBuffer - length, buffer, 0, length);
- bytesInBuffer = length;
- }
- n = stream.read(buffer, bytesInBuffer, buffer.length - bytesInBuffer);
- }
-
- if (bytesInBuffer < length) {
- length = bytesInBuffer;
- }
-
- byte[] result = new byte[length];
- System.arraycopy(buffer, bytesInBuffer - length, result, 0, length);
- return result;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * This is used to parse Lyrics3 tag information
+ * from an MP3 file, if available.
+ * Handles lyrics tags of up to 10kb in size.
+ * Will process any ID3v1 tag data if present.
+ * Ignores extended ID3v1 data in the lyrics block
+ *
+ * @see <a href="http://www.id3.org/Lyrics3v2">Lyrics3 v2.0 specification</a>
+ */
+public class LyricsHandler {
+ boolean foundLyrics = false;
+ String lyricsText = null;
+ ID3v1Handler id3v1 = null;
+
+ public LyricsHandler(InputStream stream, ContentHandler handler)
+ throws IOException, SAXException, TikaException {
+ this(getSuffix(stream, 10240+128));
+ }
+
+ /**
+ * Looks for the Lyrics data, which will be
+ * just before the ID3v1 data (if present),
+ * and process it.
+ * Also sets things up for the ID3v1
+ * processing if required.
+ * Creates from the last 128 bytes of a stream.
+ */
+ protected LyricsHandler(byte[] tagData)
+ throws IOException, SAXException, TikaException {
+ if(tagData.length < 128) {
+ return;
+ }
+
+ // Is there ID3v1 data?
+ byte[] last128 = new byte[128];
+ System.arraycopy(tagData, tagData.length-128, last128, 0, 128);
+ id3v1 = new ID3v1Handler(last128);
+
+ if(tagData.length < 137) {
+ return;
+ }
+
+ // Are there lyrics? Look for the closing Lyrics tag
+ // at the end to decide if there is any
+ int lookat = tagData.length - 9;
+ if(id3v1.found) {
+ lookat -= 128;
+ }
+ if(tagData[lookat+0] == 'L' && tagData[lookat+1] == 'Y' &&
+ tagData[lookat+2] == 'R' && tagData[lookat+3] == 'I' &&
+ tagData[lookat+4] == 'C' && tagData[lookat+5] == 'S' &&
+ tagData[lookat+6] == '2' && tagData[lookat+7] == '0' &&
+ tagData[lookat+8] == '0') {
+ foundLyrics = true;
+
+ // The length (6 bytes) comes just before LYRICS200, and is the
+ // size including the LYRICSBEGIN but excluding the
+ // length+LYRICS200 at the end.
+ int length = Integer.parseInt(
+ new String(tagData, lookat-6, 6, UTF_8)
+ );
+
+ String lyrics = new String(
+ tagData, lookat-length+5, length-11,
+ US_ASCII
+ );
+
+ // Tags are a 3 letter code, 5 digit length, then data
+ int pos = 0;
+ while(pos < lyrics.length()-8) {
+ String tagName = lyrics.substring(pos, pos+3);
+ int tagLen = Integer.parseInt(
+ lyrics.substring(pos+3, pos+8)
+ );
+ int startPos = pos + 8;
+ int endPos = startPos + tagLen;
+
+ if(tagName.equals("LYR")) {
+ lyricsText = lyrics.substring(startPos, endPos);
+ }
+
+ pos = endPos;
+ }
+ }
+ }
+
+ public boolean hasID3v1() {
+ if(id3v1 == null || id3v1.found == false) {
+ return false;
+ }
+ return true;
+ }
+ public boolean hasLyrics() {
+ return lyricsText != null && lyricsText.length() > 0;
+ }
+
+ /**
+ * Reads and returns the last <code>length</code> bytes from the
+ * given stream.
+ * @param stream input stream
+ * @param length number of bytes from the end to read and return
+ * @return stream the <code>InputStream</code> to read from.
+ * @throws IOException if the stream could not be read from.
+ */
+ protected static byte[] getSuffix(InputStream stream, int length)
+ throws IOException {
+ byte[] buffer = new byte[2 * length];
+ int bytesInBuffer = 0;
+
+ int n = stream.read(buffer);
+ while (n != -1) {
+ bytesInBuffer += n;
+ if (bytesInBuffer == buffer.length) {
+ System.arraycopy(buffer, bytesInBuffer - length, buffer, 0, length);
+ bytesInBuffer = length;
+ }
+ n = stream.read(buffer, bytesInBuffer, buffer.length - bytesInBuffer);
+ }
+
+ if (bytesInBuffer < length) {
+ length = bytesInBuffer;
+ }
+
+ byte[] result = new byte[length];
+ System.arraycopy(buffer, bytesInBuffer - length, result, 0, length);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java
index a88265f..923be8a 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java
@@ -1,25 +1,25 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mp3;
-
-
-/**
- * A frame in an MP3 file, such as ID3v2 Tags or some
- * audio.
- */
-public interface MP3Frame {
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp3;
+
+
+/**
+ * A frame in an MP3 file, such as ID3v2 Tags or some
+ * audio.
+ */
+public interface MP3Frame {
+}