You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/08 22:16:12 UTC
svn commit: r742169 - in /lucene/tika/trunk: CHANGES.txt
src/main/java/org/apache/tika/parser/audio/MidiParser.java
Author: jukka
Date: Sun Feb 8 21:16:12 2009
New Revision: 742169
URL: http://svn.apache.org/viewvc?rev=742169&view=rev
Log:
TIKA-201: Extract lyrics and other text from MIDI audio files
MIDI meta text events are now extracted and written out in a <p> element per track.
Modified:
lucene/tika/trunk/CHANGES.txt
lucene/tika/trunk/src/main/java/org/apache/tika/parser/audio/MidiParser.java
Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=742169&r1=742168&r2=742169&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sun Feb 8 21:16:12 2009
@@ -22,6 +22,9 @@
--text argument. This prevents the end of the text output from being
lost. (TIKA-179)
+ * Embedded text in MIDI files is now extracted. For example many karaoke
+ files contain song lyrics embedded as MIDI text.
+
See http://tinyurl.com/tika-0-3-changes for a list of all changes in Tika 0.3.
The following people have contributed to Tika 0.3 by submitting or commenting
Modified: lucene/tika/trunk/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=742169&r1=742168&r2=742169&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/audio/MidiParser.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/audio/MidiParser.java Sun Feb 8 21:16:12 2009
@@ -21,6 +21,8 @@
import java.io.InputStream;
import javax.sound.midi.InvalidMidiDataException;
+import javax.sound.midi.MetaMessage;
+import javax.sound.midi.MidiMessage;
import javax.sound.midi.MidiSystem;
import javax.sound.midi.Patch;
import javax.sound.midi.Sequence;
@@ -66,6 +68,23 @@
} else if (type == Sequence.SMPTE_24) {
metadata.set("divisionType", String.valueOf(type));
}
+
+ for (Track track : tracks) {
+ xhtml.startElement("p");
+ for (int i = 0; i < track.size(); i++) {
+ MidiMessage message = track.get(i).getMessage();
+ if (message instanceof MetaMessage) {
+ MetaMessage meta = (MetaMessage) message;
+ // Types 1-15 are reserved for text events
+ if (meta.getType() >= 1 && meta.getType() <= 15) {
+ // FIXME: What's the encoding?
+ xhtml.characters(
+ new String(meta.getData(), "ISO-8859-1"));
+ }
+ }
+ }
+ xhtml.endElement("p");
+ }
} catch (InvalidMidiDataException ignore) {
// There is no way to know whether this exception was
// caused by the document being corrupted or by the format