You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/01/30 20:34:44 UTC

svn commit: r904858 - in /lucene/tika/trunk: tika-core/src/main/java/org/apache/tika/metadata/ tika-parsers/src/main/java/org/apache/tika/parser/audio/ tika-parsers/src/main/java/org/apache/tika/parser/mp3/

Author: jukka
Date: Sat Jan 30 19:34:44 2010
New Revision: 904858

URL: http://svn.apache.org/viewvc?rev=904858&view=rev
Log:
TIKA-199: Improved audio detection and parsing

Add more audio metadata. Introduce XMP-style metadata properties.

Added:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Property.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=904858&r1=904857&r2=904858&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java Sat Jan 30 19:34:44 2010
@@ -77,6 +77,17 @@
     }
 
     /**
+     * Returns the value (if any) of the identified metadata property.
+     *
+     * @since Apache Tika 0.7
+     * @param property property definition
+     * @return property value, or <code>null</code> if the property is not set
+     */
+    public String get(Property property) {
+        return get(property.getName());
+    }
+
+    /**
      * Get the values associated to a metadata name.
      * 
      * @param name
@@ -147,6 +158,17 @@
     }
 
     /**
+     * Sets the value of the identified metadata property.
+     *
+     * @since Apache Tika 0.7
+     * @param property property definition
+     * @param value    property value
+     */
+    public void set(Property property, String value) {
+        set(property.getName(), value);
+    }
+
+    /**
      * Remove a metadata and all its associated values.
      * 
      * @param name

Added: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Property.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Property.java?rev=904858&view=auto
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Property.java (added)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Property.java Sat Jan 30 19:34:44 2010
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * XMP property definition. Each instance of this class defines a single
+ * metadata property like "dc:format". In addition to the property name,
+ * the {@link ValueType value type} and category (internal or external)
+ * of the property are included in the property definition. The available
+ * choice values are also stored for open and closed choice value types.
+ *
+ * @since Apache Tika 0.7
+ */
+public final class Property {
+
+    public static enum ValueType {
+        BOOLEAN, OPEN_CHOICE, CLOSED_CHOICE, DATE, INTEGER, LOCALE,
+        MIME_TYPE, PROPER_NAME, RATIONAL, REAL, TEXT, URI, URL, XPATH
+    }
+
+    private final String name;
+
+    private final boolean internal;
+
+    private final ValueType valueType;
+
+    /**
+     * The available choices for the open and closed choice value types.
+     */
+    private final Set<String> choices;
+
+    private Property(
+            String name, boolean internal,
+            ValueType valueType, String[] choices) {
+        this.name = name;
+        this.internal = internal;
+        this.valueType = valueType;
+        if (choices != null) {
+            this.choices = Collections.unmodifiableSet(
+                    new HashSet<String>(Arrays.asList(choices)));
+        } else {
+            this.choices = null;
+        }
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public boolean isInternal() {
+        return internal;
+    }
+
+    public boolean isExternal() {
+        return !internal;
+    }
+
+    public ValueType getValueType() {
+        return valueType;
+    }
+
+    /**
+     * Returns the (immutable) set of choices for the values of this property.
+     * Only defined for {@link ValueType#OPEN_CHOICE open} and
+     * {@link ValueType#CLOSED_CHOICE closed choice} value types.
+     *
+     * @return available choices, or <code>null</code>
+     */
+    public Set<String> getChoices() {
+        return choices;
+    }
+
+    private Property( String name, boolean internal, ValueType valueType) {
+        this(name, internal, valueType, null);
+    }
+
+    public static Property internalBoolean(String name) {
+        return new Property(name, true, ValueType.BOOLEAN);
+    }
+
+    public static Property internalClosedChoise(
+            String name, String... choices) {
+        return new Property(name, true, ValueType.CLOSED_CHOICE, choices);
+    }
+
+    public static Property internalDate(String name) {
+        return new Property(name, true, ValueType.DATE);
+    }
+
+    public static Property internalInteger(String name) {
+        return new Property(name, true, ValueType.INTEGER);
+    }
+
+    public static Property internalRational(String name) {
+        return new Property(name, true, ValueType.RATIONAL);
+    }
+
+    public static Property internalOpenChoise(
+            String name, String... choices) {
+        return new Property(name, true, ValueType.OPEN_CHOICE, choices);
+    }
+    public static Property internalReal(String name) {
+        return new Property(name, true, ValueType.REAL);
+    }
+
+    public static Property internalText(String name) {
+        return new Property(name, true, ValueType.TEXT);
+    }
+
+    public static Property internalURI(String name) {
+        return new Property(name, true, ValueType.URI);
+    }
+
+    public static Property externalClosedChoise(
+            String name, String... choices) {
+        return new Property(name, false, ValueType.CLOSED_CHOICE, choices);
+    }
+
+    public static Property externalDate(String name) {
+        return new Property(name, false, ValueType.DATE);
+    }
+
+    public static Property externalInteger(String name) {
+        return new Property(name, false, ValueType.INTEGER);
+    }
+
+    public static Property externalText(String name) {
+        return new Property(name, false, ValueType.TEXT);
+    }
+
+}

Added: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java?rev=904858&view=auto
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java (added)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/XMPDM.java Sat Jan 30 19:34:44 2010
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * XMP Dynamic Media schema. This is a collection of
+ * {@link Property property definition} constants for the dynamic media
+ * properties defined in the XMP standard.
+ *
+ * @since Apache Tika 0.7
+ * @see <a href="http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf"
+ *        >XMP Specification, Part 2: Standard Schemas</a>
+ */
+public interface XMPDM {
+
+    /**
+     * "The absolute path to the file's peak audio file. If empty, no peak
+     * file exists."
+     */
+    Property ABS_PEAK_AUDIO_FILE_PATH =
+        Property.internalURI("xmpDM:absPeakAudioFilePath");
+
+    /**
+     * "The name of the album."
+     */
+    Property ALBUM = Property.externalText("xmpDM:album");
+
+    /**
+     * "An alternative tape name, set via the project window or timecode
+     * dialog in Premiere. If an alternative name has been set and has not
+     * been reverted, that name is displayed."
+     */
+    Property ALT_TAPE_NAME = Property.externalText("xmpDM:altTapeName");
+
+//    /**
+//     * "A timecode set by the user. When specified, it is used instead
+//     * of the startTimecode."
+//     */
+//    Property ALT_TIMECODE = "xmpDM:altTimecode";
+
+    /**
+     * "The name of the artist or artists."
+     */
+    Property ARTIST = Property.externalText("xmpDM:artist");
+
+    /**
+     * "The date and time when the audio was last modified."
+     */
+    Property AUDIO_MOD_DATE = Property.internalDate("xmpDM:audioModDate");
+
+    /**
+     * "The audio sample rate. Can be any value, but commonly 32000, 41100,
+     * or 48000."
+     */
+    Property AUDIO_SAMPLE_RATE =
+        Property.internalInteger("xmpDM:audioSampleRate");
+
+    /**
+     * "The audio sample type."
+     */
+    Property AUDIO_SAMPLE_TYPE = Property.internalClosedChoise(
+            "xmpDM:audioSampleType", "8Int", "16Int", "32Int", "32Float");
+
+    /**
+     * "The audio channel type."
+     */
+    Property AUDIO_CHANNEL_TYPE = Property.internalClosedChoise(
+            "xmpDM:audioChannelType", "Mono", "Stereo", "5.1", "7.1");
+
+    /**
+     * "The audio compression used. For example, MP3."
+     */
+    Property AUDIO_COMPRESSOR = Property.internalText("xmpDM:audioCompressor");
+
+//    /**
+//     * "Additional parameters for Beat Splice stretch mode."
+//     */
+//    Property BEAT_SPLICE_PARAMS = "xmpDM:beatSpliceParams";
+
+    /**
+     * "The composer's name."
+     */
+    Property COMPOSER = Property.externalText("xmpDM:composer");
+
+//    /**
+//     * "An unordered list of all media used to create this media."
+//     */
+//    Property CONTRIBUTED_MEDIA = "xmpDM:contributedMedia";
+
+    /**
+     * "The copyright information."
+     */
+    Property COPYRIGHT = Property.externalText("xmpDM:copyright");
+
+//    /**
+//     * "The duration of the media file."
+//     */
+//    Property DURATION = "xmpDM:duration";
+
+    /**
+     * "The engineer's name."
+     */
+    Property ENGINEER = Property.externalText("xmpDM:engineer");
+
+    /**
+     * "The file data rate in megabytes per second. For example:
+     * '36/10' = 3.6 MB/sec"
+     */
+    Property FILE_DATA_RATE = Property.internalRational("xmpDM:fileDataRate");
+
+    /**
+     * "The name of the genre."
+     */
+    Property GENRE = Property.externalText("xmpDM:genre");
+
+    /**
+     * "The musical instrument."
+     */
+    Property INSTRUMENT = Property.externalText("xmpDM:instrument");
+
+//    /**
+//     * "The duration of lead time for queuing music."
+//     */
+//    Property INTRO_TIME = "xmpDM:introTime";
+
+    /**
+     * "The audio's musical key."
+     */
+    Property KEY = Property.internalClosedChoise(
+            "xmpDM:key", "C", "C#", "D", "D#", "E", "F", "F#",
+            "G", "G#", "A", "A#", "B");
+
+    /**
+     * "User's log comments."
+     */
+    Property LOG_COMMENT = Property.externalText("xmpDM:logComment");
+
+    /**
+     * "When true, the clip can be looped seamlessly."
+     */
+    Property LOOP = Property.internalBoolean("xmpDM:loop");
+
+    /**
+     * "The number of beats."
+     */
+    Property NUMBER_OF_BEATS = Property.internalReal("xmpDM:numberOfBeats");
+
+//    /**
+//     * An ordered list of markers. See also {@link #TRACKS xmpDM:Tracks}.
+//     */
+//    Property MARKERS = "xmpDM:markers";
+
+    /**
+     * "The date and time when the metadata was last modified."
+     */
+    Property METADATA_MOD_DATE = Property.internalDate("xmpDM:metadataModDate");
+
+//    /**
+//     * "The time at which to fade out."
+//     */
+//    Property OUT_CUE = "xmpDM:outCue";
+
+//    /**
+//     * "A reference to the project that created this file."
+//     */
+//    Property PROJECT_REF = "xmpDM:projectRef"; 
+
+    /**
+     * "The sampling phase of film to be converted to video (pull-down)."
+     */
+    Property PULL_DOWN = Property.internalClosedChoise(
+            "xmpDM:pullDown", "WSSWW", "SSWWW", "SWWWS", "WWWSS", "WWSSW",
+            "WSSWW_24p", "SSWWW_24p", "SWWWS_24p", "WWWSS_24p", "WWSSW_24p");
+
+    /**
+     * "The relative path to the file's peak audio file. If empty, no peak
+     * file exists."
+     */
+    Property RELATIVE_PEAK_AUDIO_FILE_PATH =
+        Property.internalURI("xmpDM:relativePeakAudioFilePath");
+
+//    /**
+//     * "The start time of the media inside the audio project."
+//     */
+//    Property RELATIVE_TIMESTAMP = "xmpDM:relativeTimestamp";
+
+    /**
+     * "The date the title was released."
+     */
+    Property RELEASE_DATE = Property.externalDate("xmpDM:releaseDate");
+
+//    /**
+//     * "Additional parameters for Resample stretch mode."
+//     */
+//    Property RESAMPLE_PARAMS = "xmpDM:resampleParams";
+
+    /**
+     * "The musical scale used in the music. 'Neither' is most often used
+     * for instruments with no associated scale, such as drums."
+     */
+    Property SCALE_TYPE = Property.internalClosedChoise(
+            "xmpDM:scaleType", "Major", "Minor", "Both", "Neither");
+
+    /**
+     * "The name of the scene."
+     */
+    Property SCENE = Property.externalText("xmpDM:scene");
+
+    /**
+     * "The date and time when the video was shot."
+     */
+    Property SHOT_DATE = Property.externalDate("xmpDM:shotDate");
+
+    /**
+     * "The name of the location where the video was shot. For example:
+     * 'Oktoberfest, Munich, Germany'. For more accurate  positioning,
+     * use the EXIF GPS values."
+     */
+    Property SHOT_LOCATION = Property.externalText("xmpDM:shotLocation");
+
+    /**
+     * "The name of the shot or take."
+     */
+    Property SHOT_NAME = Property.externalText("xmpDM:shotName");
+
+    /**
+     * "A description of the speaker angles from center front in degrees.
+     * For example: 'Left = -30, Right = 30, Center = 0, LFE = 45,
+     * Left Surround = -110, Right Surround = 110'"
+     */
+    Property SPEAKER_PLACEMENT =
+        Property.externalText("xmpDM:speakerPlacement");
+
+//    /**
+//     * "The timecode of the first frame of video in the file, as obtained
+//     * from the device control."
+//     */
+//    Property START_TIMECODE = "xmpDM:startTimecode";
+
+    /**
+     * "The audio stretch mode."
+     */
+    Property STRETCH_MODE = Property.internalClosedChoise(
+            "xmpDM:stretchMode", "Fixed length", "Time-Scale", "Resample",
+            "Beat Splice", "Hybrid");
+
+    /**
+     * "The name of the tape from which the clip was captured, as set during
+     * the capture process."
+     */
+    Property TAPE_NAME = Property.externalText("xmpDM:tapeName");
+
+    /**
+     * "The audio's tempo."
+     */
+    Property TEMPO = Property.internalReal("xmpDM:tempo");
+
+//    /**
+//     * "Additional parameters for Time-Scale stretch mode."
+//     */
+//    Property TIME_SCALE_PARAMS = "xmpDM:timeScaleParams";
+
+    /**
+     * "The time signature of the music."
+     */
+    Property TIME_SIGNATURE = Property.internalClosedChoise(
+            "xmpDM:timeSignature", "2/4", "3/4", "4/4", "5/4", "7/4",
+            "6/8", "9/8", "12/8", "other");
+
+    /**
+     * "A numeric value indicating the order of the audio file within its
+     * original recording."
+     */
+    Property TRACK_NUMBER = Property.externalInteger("xmpDM:trackNumber");
+
+//    /**
+//     * "An unordered list of tracks. A track is a named set of markers,
+//     * which can specify a frame rate for all markers in the set.
+//     * See also {@link #MARKERS xmpDM:markers}."
+//     */
+//    Property TRACKS = "xmpDM:Tracks";
+
+    /**
+     * "The alpha mode."
+     */
+    Property VIDEO_ALPHA_MODE = Property.externalClosedChoise(
+            "xmpDM:videoAlphaMode", "straight", "pre-multiplied");
+
+//    /**
+//     * "A color in CMYK or RGB to be used as the pre-multiple color when
+//     * alpha mode is pre-multiplied."
+//     */
+//    Property VIDEO_ALPHA_PREMULTIPLE_COLOR = "xmpDM:videoAlphaPremultipleColor";
+
+    /**
+     * "When true, unity is clear, when false, it is opaque."
+     */
+    Property VIDEO_ALPHA_UNITY_IS_TRANSPARENT =
+        Property.internalBoolean("xmpDM:videoAlphaUnityIsTransparent");
+
+    /**
+     * "The color space."
+     */
+    Property VIDEO_COLOR_SPACE = Property.internalClosedChoise(
+            "xmpDM:videoColorSpace", "sRGB", "CCIR-601", "CCIR-709");
+
+    /**
+     * "Video compression used. For example, jpeg."
+     */
+    Property VIDEO_COMPRESSOR = Property.internalText("xmpDM:videoCompressor");
+
+    /**
+     * "The field order for video."
+     */
+    Property VIDEO_FIELD_ORDER = Property.internalClosedChoise(
+            "xmpDM:videoFieldOrder", "Upper", "Lower", "Progressive");
+
+    /**
+     * "The video frame rate."
+     */
+    Property VIDEO_FRAME_RATE = Property.internalOpenChoise(
+            "xmpDM:videoFrameRate", "24", "NTSC", "PAL");
+
+//    /**
+//     * "The frame size. For example: w:720, h: 480, unit:pixels"
+//     */
+//    Property VIDEO_FRAME_SIZE = "xmpDM:videoFrameSize";
+
+    /**
+     * "The date and time when the video was last modified."
+     */
+    Property VIDEO_MOD_DATE = Property.internalDate("xmpDM:videoModDate");
+
+    /**
+     * "The size in bits of each color component of a pixel. Standard
+     *  Windows 32-bit pixels have 8 bits per component."
+     */
+    Property VIDEO_PIXEL_DEPTH = Property.internalClosedChoise(
+            "xmpDM:videoPixelDepth", "8Int", "16Int", "32Int", "32Float");
+
+    /**
+     * "The aspect ratio, expressed as wd/ht. For example: '648/720' = 0.9"
+     */
+    Property VIDEO_PIXEL_ASPECT_RATIO =
+        Property.internalRational("xmpDM:videoPixelAspectRatio");
+
+}

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java?rev=904858&r1=904857&r2=904858&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java Sat Jan 30 19:34:44 2010
@@ -21,12 +21,15 @@
 import java.io.InputStream;
 import java.util.Map.Entry;
 
+import javax.sound.sampled.AudioFileFormat;
 import javax.sound.sampled.AudioFormat;
 import javax.sound.sampled.AudioSystem;
 import javax.sound.sampled.UnsupportedAudioFileException;
+import javax.sound.sampled.AudioFileFormat.Type;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -39,31 +42,46 @@
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
         // AudioSystem expects the stream to support the mark feature
-        InputStream buffered = new BufferedInputStream(stream);
+        if (!stream.markSupported()) {
+            stream = new BufferedInputStream(stream);
+        }
         try {
-            AudioFormat format =
-                AudioSystem.getAudioFileFormat(buffered).getFormat();
-
-            float rate = format.getSampleRate();
-            if (rate != AudioSystem.NOT_SPECIFIED) {
-                metadata.set("samplerate", String.valueOf(rate));
+            AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(stream);
+            Type type = fileFormat.getType();
+            if (type == Type.AIFC || type == Type.AIFF) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
+            } else if (type == Type.AU || type == Type.SND) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
+            } else if (type == Type.WAVE) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
             }
 
-            int channels = format.getChannels();
+            AudioFormat audioFormat = fileFormat.getFormat();
+            int channels = audioFormat.getChannels();
             if (channels != AudioSystem.NOT_SPECIFIED) {
                 metadata.set("channels", String.valueOf(channels));
+                // TODO: Use XMPDM.TRACKS? (see also frame rate in AudioFormat)
             }
-
-            int bits = format.getSampleSizeInBits();
+            float rate = audioFormat.getSampleRate();
+            if (rate != AudioSystem.NOT_SPECIFIED) {
+                metadata.set("samplerate", String.valueOf(rate));
+                metadata.set(
+                        XMPDM.AUDIO_SAMPLE_RATE,
+                        Integer.toString((int) rate));
+            }
+            int bits = audioFormat.getSampleSizeInBits();
             if (bits != AudioSystem.NOT_SPECIFIED) {
                 metadata.set("bits", String.valueOf(bits));
+                if (bits == 8) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
+                } else if (bits == 16) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
+                } else if (bits == 32) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
+                }
             }
-
-            metadata.set("encoding", format.getEncoding().toString());
+            metadata.set("encoding", audioFormat.getEncoding().toString());
 
             // Javadoc suggests that some of the following properties might
             // be available, but I had no success in finding any:
@@ -75,7 +93,10 @@
             // "date" Date date of the recording or release
             // "comment" String an arbitrary text
 
-            for (Entry<String, Object> entry : format.properties().entrySet()) {
+            for (Entry<String, Object> entry : fileFormat.properties().entrySet()) {
+                metadata.set(entry.getKey(), entry.getValue().toString());
+            }
+            for (Entry<String, Object> entry : audioFormat.properties().entrySet()) {
                 metadata.set(entry.getKey(), entry.getValue().toString());
             }
         } catch (UnsupportedAudioFileException e) {
@@ -84,6 +105,8 @@
             // just being unsupported. So we do nothing.
         }
 
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
         xhtml.endDocument();
     }
 

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=904858&r1=904857&r2=904858&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java Sat Jan 30 19:34:44 2010
@@ -42,6 +42,8 @@
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
+        metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
+
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();
 
@@ -52,6 +54,7 @@
 
             Track[] tracks = sequence.getTracks();
             metadata.set("tracks", String.valueOf(tracks.length));
+            // TODO: Use XMPDM.TRACKS?
 
             Patch[] patches = sequence.getPatchList();
             metadata.set("patches", String.valueOf(patches.length));

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=904858&r1=904857&r2=904858&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java Sat Jan 30 19:34:44 2010
@@ -23,6 +23,7 @@
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -56,6 +57,11 @@
 
            metadata.set(Metadata.TITLE, tag.getTitle());
            metadata.set(Metadata.AUTHOR, tag.getArtist());
+           metadata.set(XMPDM.ARTIST, tag.getArtist());
+           metadata.set(XMPDM.ALBUM, tag.getAlbum());
+           metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
+           metadata.set(XMPDM.GENRE, tag.getGenre());
+           metadata.set(XMPDM.LOG_COMMENT, tag.getComment());
 
            xhtml.element("h1", tag.getTitle());
            xhtml.element("p", tag.getArtist());
@@ -63,6 +69,7 @@
             // ID3v1.1 Track addition
             if (tag.getTrackNumber() != null) {
                 xhtml.element("p", tag.getAlbum() + ", track " + tag.getTrackNumber());
+                metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
             } else {
                 xhtml.element("p", tag.getAlbum());
             }
@@ -74,6 +81,9 @@
             metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
             metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
             metadata.set("version", audioAndTags.audio.getVersion());
+            metadata.set(
+                    XMPDM.AUDIO_SAMPLE_RATE,
+                    Integer.toString(audioAndTags.audio.getSampleRate()));
         }
 
         xhtml.endDocument();