You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/06/15 16:09:39 UTC

[tika] branch main updated: TIKA-3412 --

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new dbebeb5  TIKA-3412 --
dbebeb5 is described below

commit dbebeb5ebc6aa139bc3660850d1f9947fb711c46
Author: tallison <ta...@apache.org>
AuthorDate: Tue Jun 15 12:09:14 2021 -0400

    TIKA-3412 --
---
 CHANGES.txt                                        |   2 +
 tika-parent/pom.xml                                |   2 +-
 .../tika-parser-audiovideo-module/pom.xml          |   5 +
 .../mp4/{MP4Parser.java => LegacyMP4Parser.java}   |   2 +-
 .../java/org/apache/tika/parser/mp4/MP4Parser.java | 351 ++++++++-------------
 .../apache/tika/parser/mp4/TikaMp4BoxHandler.java  |  63 ++++
 .../tika/parser/mp4/boxes/TikaUserDataBox.java     | 247 +++++++++++++++
 ...MP4ParserTest.java => LegacyMP4ParserTest.java} |  10 +-
 .../org/apache/tika/parser/mp4/MP4ParserTest.java  |  17 +-
 .../tika-parser-image-module/pom.xml               |   2 +-
 10 files changed, 458 insertions(+), 243 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 84be502..098bad2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -35,6 +35,8 @@ Release 2.0.0-ALPHA - 01/13/2021
        what has been changed in the config object.
      * We are now using non-shaded versions of xmpcore with namespaces com.adobe.internal.*
        vs com.adobe.*.
+     * We switched the underlying MP4 parser to Drew Noakes metadata-extractor's MP4 parser
+       from sannies' isoparser.
 
    * tika-parsers
      * The parser modules have been broken into three main modules:
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 8e52aa3..5cf2d85 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -328,7 +328,7 @@
     <log4j2.version>2.14.1</log4j2.version>
     <lombok.version>1.18.20</lombok.version>
     <lucene.version>8.8.2</lucene.version>
-    <metadata.extractor.version>2.15.0.1</metadata.extractor.version>
+    <metadata.extractor.version>2.16.0</metadata.extractor.version>
     <microsoft.translator.version>0.6.2</microsoft.translator.version>
     <mime4j.version>0.8.4</mime4j.version>
     <mockito.version>3.7.7</mockito.version>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/pom.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/pom.xml
index c968347..64a7aab 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/pom.xml
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/pom.xml
@@ -34,6 +34,11 @@
       <artifactId>isoparser</artifactId>
       <version>${isoparser.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.drewnoakes</groupId>
+      <artifactId>metadata-extractor</artifactId>
+      <version>${metadata.extractor.version}</version>
+    </dependency>
 
   </dependencies>
   <build>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/LegacyMP4Parser.java
similarity index 99%
copy from tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
copy to tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/LegacyMP4Parser.java
index e9e21c4..5a2936c 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/LegacyMP4Parser.java
@@ -81,7 +81,7 @@ import org.apache.tika.sax.XHTMLContentHandler;
  * This uses the MP4Parser project from http://code.google.com/p/mp4parser/
  * to do the underlying parsing
  */
-public class MP4Parser extends AbstractParser {
+public class LegacyMP4Parser extends AbstractParser {
     /**
      * Serial version UID
      */
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
index e9e21c4..00861e7 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
@@ -16,12 +16,16 @@
  */
 package org.apache.tika.parser.mp4;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Files;
 import java.text.DecimalFormat;
 import java.text.NumberFormat;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
@@ -29,50 +33,30 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 
-import org.mp4parser.Box;
-import org.mp4parser.Container;
-import org.mp4parser.IsoFile;
-import org.mp4parser.boxes.apple.AppleAlbumBox;
-import org.mp4parser.boxes.apple.AppleArtist2Box;
-import org.mp4parser.boxes.apple.AppleArtistBox;
-import org.mp4parser.boxes.apple.AppleCommentBox;
-import org.mp4parser.boxes.apple.AppleCompilationBox;
-import org.mp4parser.boxes.apple.AppleDiskNumberBox;
-import org.mp4parser.boxes.apple.AppleEncoderBox;
-import org.mp4parser.boxes.apple.AppleGPSCoordinatesBox;
-import org.mp4parser.boxes.apple.AppleGenreBox;
-import org.mp4parser.boxes.apple.AppleItemListBox;
-import org.mp4parser.boxes.apple.AppleNameBox;
-import org.mp4parser.boxes.apple.AppleRecordingYear2Box;
-import org.mp4parser.boxes.apple.AppleTrackAuthorBox;
-import org.mp4parser.boxes.apple.AppleTrackNumberBox;
-import org.mp4parser.boxes.apple.Utf8AppleDataBox;
-import org.mp4parser.boxes.iso14496.part12.FileTypeBox;
-import org.mp4parser.boxes.iso14496.part12.MetaBox;
-import org.mp4parser.boxes.iso14496.part12.MovieBox;
-import org.mp4parser.boxes.iso14496.part12.MovieHeaderBox;
-import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox;
-import org.mp4parser.boxes.iso14496.part12.SampleTableBox;
-import org.mp4parser.boxes.iso14496.part12.TrackBox;
-import org.mp4parser.boxes.iso14496.part12.TrackHeaderBox;
-import org.mp4parser.boxes.iso14496.part12.UserDataBox;
-import org.mp4parser.boxes.sampleentry.AudioSampleEntry;
+import com.drew.imaging.mp4.Mp4Reader;
+import com.drew.metadata.Directory;
+import com.drew.metadata.MetadataException;
+import com.drew.metadata.Tag;
+import com.drew.metadata.mp4.Mp4BoxHandler;
+import com.drew.metadata.mp4.Mp4Directory;
+import com.drew.metadata.mp4.media.Mp4SoundDirectory;
+import com.drew.metadata.mp4.media.Mp4VideoDirectory;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import org.apache.tika.config.Field;
+import org.apache.tika.exception.RuntimeSAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.XMP;
 import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.utils.StringUtils;
 
 /**
  * Parser for the MP4 media container format, as well as the older
@@ -116,24 +100,6 @@ public class MP4Parser extends AbstractParser {
 
     private ISO6709Extractor iso6709Extractor = new ISO6709Extractor();
 
-    private static void addMetadata(Property prop, Metadata m, Utf8AppleDataBox metadata) {
-        if (metadata != null) {
-            m.set(prop, metadata.getValue());
-        }
-    }
-
-    private static <T extends Box> T getOrNull(Container box, Class<T> clazz) {
-        if (box == null) {
-            return null;
-        }
-
-        List<T> boxes = box.getBoxes(clazz);
-        if (boxes.size() == 0) {
-            return null;
-        }
-        return boxes.get(0);
-    }
-
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
     }
@@ -141,228 +107,157 @@ public class MP4Parser extends AbstractParser {
     public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
                       ParseContext context) throws IOException, SAXException, TikaException {
 
-        // The MP4Parser library accepts either a File, or a byte array
-        // As MP4 video files are typically large, always use a file to
-        //  avoid OOMs that may occur with in-memory buffering
         TemporaryResources tmp = new TemporaryResources();
         TikaInputStream tstream = TikaInputStream.get(stream, tmp);
 
-        try (IsoFile isoFile = new IsoFile(tstream.getFile())) {
-
-            // Grab the file type box
-            FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class);
-            if (fileType != null) {
-                // Identify the type based on the major brand
-                Optional<MediaType> typeHolder = typesMap.entrySet().stream()
-                        .filter(e -> e.getValue().contains(fileType.getMajorBrand())).findFirst()
-                        .map(Map.Entry::getKey);
-
-                if (!typeHolder.isPresent()) {
-                    // If no match for major brand, see if any of the compatible brands match
-                    typeHolder = typesMap.entrySet().stream().filter(e -> e.getValue().stream()
-                            .anyMatch(fileType.getCompatibleBrands()::contains)).findFirst()
-                            .map(Map.Entry::getKey);
-                }
-
-                MediaType type = typeHolder.orElse(MediaType.application("mp4"));
-                metadata.set(Metadata.CONTENT_TYPE, type.toString());
-
-                if (type.getType().equals("audio")) {
-                    metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim());
-                }
-            } else {
-                // Some older QuickTime files lack the FileType
-                metadata.set(Metadata.CONTENT_TYPE, "video/quicktime");
-            }
-
-
-            // Get the main MOOV box
-            MovieBox moov = getOrNull(isoFile, MovieBox.class);
-            if (moov == null) {
-                // Bail out
-                return;
-            }
-
+        try (InputStream is = Files.newInputStream(tstream.getPath())) {
 
             XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
             xhtml.startDocument();
-
-            handleMovieHeaderBox(moov, metadata, xhtml);
-            handleTrackBoxes(moov, metadata, xhtml);
-
-            // Get metadata from the User Data Box
-            UserDataBox userData = getOrNull(moov, UserDataBox.class);
-            if (userData != null) {
-                extractGPS(userData, metadata);
-                MetaBox metaBox = getOrNull(userData, MetaBox.class);
-
-                // Check for iTunes Metadata
-                // See http://atomicparsley.sourceforge.net/mpeg-4files.html and
-                //  http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these
-                handleApple(metaBox, metadata, xhtml);
-                // TODO Check for other kinds too
+            com.drew.metadata.Metadata mp4Metadata = new com.drew.metadata.Metadata();
+            Mp4BoxHandler boxHandler = new TikaMp4BoxHandler(mp4Metadata, metadata, xhtml);
+            try {
+                Mp4Reader.extract(is, boxHandler);
+            } catch (RuntimeSAXException e) {
+                throw (SAXException) e.getCause();
             }
-
-            // All done
+            //TODO -- figure out how to get IOExceptions out of boxhandler. Mp4Reader
+            //currently swallows IOExceptions.
+            processMp4Directories(mp4Metadata.getDirectoriesOfType(Mp4Directory.class), metadata);
             xhtml.endDocument();
-
         } finally {
             tmp.dispose();
         }
-
     }
 
-    private void handleTrackBoxes(MovieBox moov, Metadata metadata, XHTMLContentHandler xhtml) {
-
-        // Get some more information from the track header
-        // TODO Decide how to handle multiple tracks
-        List<TrackBox> tb = moov.getBoxes(TrackBox.class);
-        if (tb == null || tb.size() == 0) {
-            return;
+    private void processMp4Directories(Collection<Mp4Directory> mp4Directories, Metadata metadata) {
+        for (Mp4Directory mp4Directory : mp4Directories) {
+            if (mp4Directory instanceof Mp4SoundDirectory) {
+                processMp4SoundDirectory((Mp4SoundDirectory) mp4Directory, metadata);
+            } else if (mp4Directory instanceof Mp4VideoDirectory) {
+                processMp4VideoDirectory((Mp4VideoDirectory) mp4Directory, metadata);
+            } else {
+                processActualMp4Directory(mp4Directory, metadata);
+            }
         }
-        TrackBox track = tb.get(0);
-
-        TrackHeaderBox header = track.getTrackHeaderBox();
-        // Get the creation and modification dates
-        metadata.set(TikaCoreProperties.CREATED, header.getCreationTime());
-        metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime());
+    }
 
-        // Get the video with and height
-        metadata.set(Metadata.IMAGE_WIDTH, (int) header.getWidth());
-        metadata.set(Metadata.IMAGE_LENGTH, (int) header.getHeight());
+    private void processMp4VideoDirectory(Mp4VideoDirectory mp4Directory, Metadata metadata) {
+        //todo
+    }
 
-        // Get the sample information
-        SampleTableBox samples = track.getSampleTableBox();
-        if (samples != null) {
-            SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox();
-            if (sampleDesc != null) {
-                // Look for the first Audio Sample, if present
-                AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class);
-                if (sample != null) {
-                    XMPDM.ChannelTypePropertyConverter
-                            .convertAndSet(metadata, sample.getChannelCount());
-                    //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize());
-                    // TODO Num -> Type mapping
-                    metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int) sample.getSampleRate());
-                    //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket());
-                    //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample());
-                }
+    private void processMp4SoundDirectory(Mp4SoundDirectory mp4SoundDirectory,
+                                        Metadata metadata) {
+        addInt(mp4SoundDirectory, metadata, Mp4SoundDirectory.TAG_AUDIO_SAMPLE_RATE,
+                XMPDM.AUDIO_SAMPLE_RATE);
+
+        try {
+            int numChannels = mp4SoundDirectory.getInt(Mp4SoundDirectory.TAG_NUMBER_OF_CHANNELS);
+            if (numChannels == 1) {
+                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Mono");
+            } else if (numChannels == 2) {
+                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Stereo");
+            } else {
+                //??? log
             }
+        } catch (MetadataException e) {
+            //log
         }
     }
 
-    private void handleMovieHeaderBox(MovieBox moov, Metadata metadata, XHTMLContentHandler xhtml) {
-        // Pull out some information from the header box
-        MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
-        if (mHeader == null) {
-            return;
+    private void addInt(Mp4Directory mp4Directory, Metadata metadata, int tag,
+                        Property property) {
+        try {
+            int val = mp4Directory.getInt(tag);
+            metadata.set(property, val);
+        } catch (MetadataException e) {
+            //log
         }
-        // Get the creation and modification dates
-        metadata.set(TikaCoreProperties.CREATED, mHeader.getCreationTime());
-        metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());
-
-        // Get the duration
-        double durationSeconds = ((double) mHeader.getDuration()) / mHeader.getTimescale();
-        metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds));
-
-        // The timescale is normally the sampling rate
-        metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int) mHeader.getTimescale());
     }
 
-    private void handleApple(MetaBox metaBox, Metadata metadata, XHTMLContentHandler xhtml)
-            throws SAXException {
-        AppleItemListBox apple = getOrNull(metaBox, AppleItemListBox.class);
-        if (apple == null) {
-            return;
-        }
-        // Title
-        AppleNameBox title = getOrNull(apple, AppleNameBox.class);
-        addMetadata(TikaCoreProperties.TITLE, metadata, title);
+    private void processActualMp4Directory(Mp4Directory mp4Directory, Metadata metadata) {
+        addDate(mp4Directory, metadata, Mp4Directory.TAG_CREATION_TIME, TikaCoreProperties.CREATED);
+        addDate(mp4Directory, metadata, Mp4Directory.TAG_MODIFICATION_TIME,
+                TikaCoreProperties.MODIFIED);
+        handleBrands(mp4Directory, metadata);
+        handleDurationInSeconds(mp4Directory, metadata);
 
-        // Artist
-        AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class);
-        addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
-        addMetadata(XMPDM.ARTIST, metadata, artist);
+        addDouble(mp4Directory, metadata, Mp4Directory.TAG_LATITUDE, TikaCoreProperties.LATITUDE);
+        addDouble(mp4Directory, metadata, Mp4Directory.TAG_LONGITUDE, TikaCoreProperties.LONGITUDE);
 
-        // Album Artist
-        AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class);
-        addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);
-
-        // Album
-        AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
-        addMetadata(XMPDM.ALBUM, metadata, album);
-
-        // Composer
-        AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class);
-        addMetadata(XMPDM.COMPOSER, metadata, composer);
-
-        // Genre
-        AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class);
-        addMetadata(XMPDM.GENRE, metadata, genre);
+    }
 
-        // Year
-        AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class);
-        if (year != null) {
-            metadata.set(XMPDM.RELEASE_DATE, year.getValue());
+    private void handleDurationInSeconds(Mp4Directory mp4Directory, Metadata metadata) {
+        String durationInSeconds = mp4Directory.getString(Mp4Directory.TAG_DURATION_SECONDS);
+        if (durationInSeconds == null) {
+            return;
         }
-
-        // Track number
-        AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class);
-        if (trackNum != null) {
-            metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA());
-            //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO
+        if (! durationInSeconds.contains("/")) {
+            return;
         }
-
-        // Disc number
-        AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class);
-        if (discNum != null) {
-            metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
+        String[] bits = durationInSeconds.split("/");
+        if (bits.length != 2) {
+            return;
         }
-
-        // Compilation
-        AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class);
-        if (compilation != null) {
-            metadata.set(XMPDM.COMPILATION, (int) compilation.getValue());
+        double durationSeconds;
+        try {
+            long numerator = Long.parseLong(bits[0]);
+            long denominator = Long.parseLong(bits[1]);
+            durationSeconds = (double)numerator/(double)denominator;
+        } catch (NumberFormatException e) {
+            //log
+            return;
         }
+        // Get the duration
+        metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds));
+    }
 
-        // Comment
-        AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class);
-        addMetadata(XMPDM.LOG_COMMENT, metadata, comment);
+    private void handleBrands(Mp4Directory mp4Directory, Metadata metadata) {
 
-        // Encoder
-        AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class);
-        if (encoder != null) {
-            metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
-        }
 
+        String majorBrand = mp4Directory.getString(Mp4Directory.TAG_MAJOR_BRAND);
+        // Identify the type based on the major brand
+        Optional<MediaType> typeHolder = typesMap.entrySet().stream()
+                .filter(e -> e.getValue().contains(majorBrand)).findFirst()
+                .map(Map.Entry::getKey);
 
-        // As text
-        for (Box box : apple.getBoxes()) {
-            if (box instanceof Utf8AppleDataBox) {
-                xhtml.element("p", ((Utf8AppleDataBox) box).getValue());
+        if (!typeHolder.isPresent()) {
+            String compatibleBrands =
+                    mp4Directory.getString(Mp4Directory.TAG_COMPATIBLE_BRANDS);
+            if (compatibleBrands != null) {
+                // If no match for major brand, see if any of the compatible brands match
+                typeHolder = typesMap.entrySet().stream().filter(e ->
+                        e.getValue().stream().anyMatch(compatibleBrands::contains))
+                        .findFirst().map(Map.Entry::getKey);
             }
         }
+        MediaType type = typeHolder.orElse(MediaType.application("mp4"));
+        metadata.set(Metadata.CONTENT_TYPE, type.toString());
+        if (type.getType().equals("audio") && ! StringUtils.isBlank(majorBrand)) {
+            metadata.set(XMPDM.AUDIO_COMPRESSOR, majorBrand.trim());
+        }
 
     }
 
-    /**
-     * Override the maximum record size limit.  NOTE: this
-     * sets a static variable on the IsoFile and affects all files
-     * parsed in this JVM!!!
-     *
-     * @param maxRecordSize
-     */
-    @Field
-    public void setMaxRecordSize(long maxRecordSize) {
-        IsoFile.MAX_RECORD_SIZE_OVERRIDE = maxRecordSize;
+    private void addDate(Mp4Directory mp4Directory, Metadata metadata, int tag,
+                         Property property) {
+        Date d = mp4Directory.getDate(tag);
+        if (d == null) {
+            return;
+        }
+        metadata.set(property, d);
+
     }
 
-    private void extractGPS(UserDataBox userData, Metadata metadata) {
-        AppleGPSCoordinatesBox coordBox = getOrNull(userData, AppleGPSCoordinatesBox.class);
-        if (coordBox == null) {
+    private void addDouble(Directory mp4Directory, Metadata metadata, int tag,
+                           Property property) {
+        try {
+            double val = mp4Directory.getDouble(tag);
+            metadata.set(property, val);
+        } catch (MetadataException e) {
+            //log
             return;
         }
-        String iso6709 = coordBox.getValue();
-        iso6709Extractor.extract(iso6709, metadata);
+
     }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/TikaMp4BoxHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/TikaMp4BoxHandler.java
new file mode 100644
index 0000000..6cb6ccc
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/TikaMp4BoxHandler.java
@@ -0,0 +1,63 @@
+package org.apache.tika.parser.mp4;
+
+import java.io.IOException;
+
+import com.drew.imaging.mp4.Mp4Handler;
+import com.drew.lang.annotations.NotNull;
+import com.drew.lang.annotations.Nullable;
+import com.drew.metadata.Metadata;
+import com.drew.metadata.mp4.Mp4BoxHandler;
+import com.drew.metadata.mp4.Mp4Context;
+import com.drew.metadata.mp4.boxes.Box;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.parser.mp4.boxes.TikaUserDataBox;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+public class TikaMp4BoxHandler extends Mp4BoxHandler {
+
+    org.apache.tika.metadata.Metadata tikaMetadata;
+    final XHTMLContentHandler xhtml;
+    public TikaMp4BoxHandler(Metadata metadata, org.apache.tika.metadata.Metadata tikaMetadata,
+                             XHTMLContentHandler xhtml) {
+        super(metadata);
+        this.tikaMetadata = tikaMetadata;
+        this.xhtml = xhtml;
+    }
+
+    @Override
+    public boolean shouldAcceptBox(@NotNull Box box) {
+        if (box.type.equals("udta")) {
+            return true;
+        }
+        return super.shouldAcceptBox(box);
+    }
+
+    @Override
+    public boolean shouldAcceptContainer(@NotNull Box box) {
+        return super.shouldAcceptContainer(box);
+    }
+
+    @Override
+    public Mp4Handler<?> processBox(@NotNull Box box, @Nullable byte[] payload, Mp4Context context)
+            throws IOException {
+        if (box.type.equals("udta")) {
+            return processUserData(box, payload, context);
+        }
+
+        return super.processBox(box, payload, context);
+    }
+
+
+    private Mp4Handler<?> processUserData(Box box, byte[] payload, Mp4Context context) throws IOException {
+        if (payload == null) {
+            return this;
+        }
+        try {
+            new TikaUserDataBox(box, payload, tikaMetadata, xhtml).addMetadata(directory);
+        } catch (SAXException e) {
+            throw new IOException(e);
+        }
+        return this;
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/boxes/TikaUserDataBox.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/boxes/TikaUserDataBox.java
new file mode 100644
index 0000000..c76b3e0
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/parser/mp4/boxes/TikaUserDataBox.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4.boxes;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.drew.lang.SequentialByteArrayReader;
+import com.drew.lang.SequentialReader;
+import com.drew.lang.annotations.NotNull;
+import com.drew.lang.annotations.Nullable;
+import com.drew.metadata.mp4.Mp4Directory;
+import com.drew.metadata.mp4.boxes.Box;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.exception.RuntimeSAXException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMP;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+public class TikaUserDataBox extends Box {
+
+    private static final String LOCATION_CODE = "\u00A9xyz";
+    private static final Pattern COORDINATE_PATTERN =
+            Pattern.compile("([+-]\\d+\\.\\d+)([+-]\\d+\\.\\d+)");
+
+    @Nullable
+    private String coordinateString;
+
+    private boolean isQuickTime = false;
+    private final Metadata metadata;
+    private final XHTMLContentHandler xhtml;
+    public TikaUserDataBox(@NotNull Box box, byte[] payload, Metadata metadata,
+                           XHTMLContentHandler xhtml) throws IOException, SAXException {
+        super(box);
+        this.metadata = metadata;
+        this.xhtml = xhtml;
+        int length = payload.length;
+        SequentialReader reader = new SequentialByteArrayReader(payload);
+        while (reader.getPosition() < (long) length) {
+            long size = reader.getUInt32();
+            if (size <= 4L) {
+                break;
+            }
+            String kindName = reader.getString(4, StandardCharsets.ISO_8859_1);
+            if (LOCATION_CODE.equals(kindName)) {
+                int xyzLength = reader.getUInt16();
+                reader.skip(2L);
+                this.coordinateString = reader.getString(xyzLength, "UTF-8");
+            } else if ("meta".equals(kindName)) {
+                reader.getUInt32();
+                reader.getUInt32();
+                String hdlr = reader.getString(4, StandardCharsets.ISO_8859_1);
+                reader.getUInt32();
+                reader.getUInt32();
+                String subtype = reader.getString(4, StandardCharsets.ISO_8859_1);
+                // If the second and the fifth 32-bit integers encode 'hdlr' and 'mdta' respectively
+                // then the MetaBox is formatted according to QuickTime File Format.
+                // See https://developer.apple.com/library/content/documentation
+                // /QuickTime/QTFF/Metadata/Metadata.html
+                if (hdlr.equals("hdlr") && subtype.equals("mdta")) {
+                    isQuickTime = true;
+                }
+                parseUserDataBox(reader, subtype);
+            } else {
+                if (size < 8L) {
+                    return;
+                }
+
+                reader.skip(size - 8L);
+            }
+        }
+
+    }
+
+    private void parseUserDataBox(SequentialReader reader, String handlerType)
+            throws IOException, SAXException {
+        if (! "mdir".equals(handlerType)) {
+            return;
+        }
+        String mdirType = reader.getString(4, StandardCharsets.ISO_8859_1);
+
+        if ("appl".equals(mdirType)) {
+            reader.getString(10);//not sure what these bytes are
+            long len = reader.getUInt32();
+            if (len >= Integer.MAX_VALUE || len <= 0) {
+                //log
+                return;
+            }
+            String subType = reader.getString(4, StandardCharsets.ISO_8859_1);
+            if ("ilst".equals(subType)) {
+                processIList(reader, len);
+            }
+        }
+    }
+
+    private void processIList(SequentialReader reader, long totalLen)
+            throws IOException {
+
+        long totalRead = 0;
+        while (totalRead < totalLen) {
+            long recordLen = reader.getUInt32();
+            String fieldName = reader.getString(4, StandardCharsets.ISO_8859_1);
+            long fieldLen = reader.getUInt32();
+            String typeName = reader.getString(4, StandardCharsets.ISO_8859_1);//data
+            totalRead += 16;
+            if ("data".equals(typeName)) {
+                reader.skip(8);//not sure what these are
+                totalRead += 8;
+                int toRead = (int) fieldLen - 16;
+                if (toRead <= 0) {
+                    //log?
+                    return;
+                }
+                if ("covr".equals(fieldName)) {
+                    //covr can be an image file, e.g. png or jpeg
+                    //skip this for now
+                    reader.skip(toRead);
+                } else if ("cpil".equals(fieldName)) {
+                    int compilationId = (int)reader.getByte();
+                    metadata.set(XMPDM.COMPILATION, compilationId);
+                } else if ("trkn".equals(fieldName)) {
+                    if (toRead == 8) {
+                        long numA = reader.getUInt32();
+                        long numB = reader.getUInt32();
+                        metadata.set(XMPDM.TRACK_NUMBER, (int)numA);
+                    } else {
+                        //log
+                        reader.skip(toRead);
+                    }
+                } else if ("disk".equals(fieldName)) {
+                    int a = reader.getInt32();
+                    short b = reader.getInt16();
+                    metadata.set(XMPDM.DISC_NUMBER, a);
+                } else {
+                    String val = reader.getString(toRead, StandardCharsets.UTF_8);
+                    try {
+                        addMetadata(fieldName, val);
+                    } catch (SAXException e) {
+                        //need to punch through IOException catching in MP4Reader
+                        throw new RuntimeSAXException(e);
+                    }
+                }
+
+                totalRead += toRead;
+            } else {
+                int toSkip = (int) recordLen - 16;
+                if (toSkip <= 0) {
+                    //log?
+                    return;
+                }
+                reader.skip(toSkip);
+                totalRead += toSkip;
+            }
+        }
+    }
+
+
+    private void addMetadata(String key, String value) throws SAXException {
+        switch (key) {
+            case "\u00A9nam":
+                metadata.set(TikaCoreProperties.TITLE, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9too":
+                metadata.set(XMP.CREATOR_TOOL, value);
+                break;
+            case "\u00A9ART" :
+                metadata.set(XMPDM.ARTIST, value);
+                metadata.set(TikaCoreProperties.CREATOR, value);
+                xhtml.element("p", value);
+                break;
+            case "aART" :
+                metadata.set(XMPDM.ALBUM_ARTIST, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9wrt":
+                metadata.set(XMPDM.COMPOSER, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9alb":
+                metadata.set(XMPDM.ALBUM, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9gen" :
+                metadata.set(XMPDM.GENRE, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9day" :
+                //this can be a year "2008" or a date "2017-04-26T07:00:00Z"
+                metadata.set(XMPDM.RELEASE_DATE, value);
+                xhtml.element("p", value);
+                break;
+            case "\u00A9cmt" :
+                metadata.set(XMPDM.LOG_COMMENT, value);
+                xhtml.element("p", value);
+                break;
+            case "cprt" :
+                metadata.set(XMPDM.COPYRIGHT, value);
+                xhtml.element("p", value);
+                break;
+            case "xid " :
+                //not sure this is the right use of this key
+                metadata.set(XMP.IDENTIFIER, value);
+                break;
+                //purd date?
+                //xid ? e.g. SonyBMG:isrc:KRA031208874
+                //cprt copyright
+                //ownr ? and apID
+                //flvr ?
+                //son = nam, soal = (c)alb soar = aART?
+                //(C)ART
+        }
+    }
+
+    public void addMetadata(Mp4Directory directory) {
+        if (this.coordinateString != null) {
+            Matcher matcher = COORDINATE_PATTERN.matcher(this.coordinateString);
+            if (matcher.find()) {
+                double latitude = Double.parseDouble(matcher.group(1));
+                double longitude = Double.parseDouble(matcher.group(2));
+                directory.setDouble(8193, latitude);
+                directory.setDouble(8194, longitude);
+            }
+        }
+    }
+}
+
+
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/LegacyMP4ParserTest.java
similarity index 93%
copy from tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
copy to tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/LegacyMP4ParserTest.java
index ee9c9e4..f00394c 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/LegacyMP4ParserTest.java
@@ -18,6 +18,8 @@ package org.apache.tika.parser.mp4;
 
 import static org.junit.Assert.assertEquals;
 
+import java.util.Arrays;
+
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
@@ -34,7 +36,7 @@ import org.apache.tika.sax.BodyContentHandler;
 /**
  * Test case for parsing mp4 files.
  */
-public class MP4ParserTest extends TikaTest {
+public class LegacyMP4ParserTest extends TikaTest {
     /**
      * Test that we can extract information from
      * a M4A MP4 Audio file
@@ -42,7 +44,8 @@ public class MP4ParserTest extends TikaTest {
     @Test
     public void testMP4ParsingAudio() throws Exception {
         Metadata metadata = new Metadata();
-        String content = getText("testMP4.m4a", metadata);
+
+        String content = getText("testMP4.m4a", new LegacyMP4Parser(), metadata);
 
         // Check core properties
         assertEquals("audio/mp4", metadata.get(Metadata.CONTENT_TYPE));
@@ -79,6 +82,9 @@ public class MP4ParserTest extends TikaTest {
 
         assertEquals("iTunes 10.5.3.3", metadata.get(XMP.CREATOR_TOOL));
 
+        assertContains("org.apache.tika.parser.mp4.LegacyMP4Parser",
+                Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
+
 
         // Check again by file, rather than stream
         TikaInputStream tstream =
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
index ee9c9e4..14dd97e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
@@ -18,11 +18,12 @@ package org.apache.tika.parser.mp4;
 
 import static org.junit.Assert.assertEquals;
 
+import java.util.Arrays;
+
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
 import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -79,6 +80,8 @@ public class MP4ParserTest extends TikaTest {
 
         assertEquals("iTunes 10.5.3.3", metadata.get(XMP.CREATOR_TOOL));
 
+        assertContains("org.apache.tika.parser.mp4.MP4Parser",
+                Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
 
         // Check again by file, rather than stream
         TikaInputStream tstream =
@@ -97,14 +100,8 @@ public class MP4ParserTest extends TikaTest {
     // TODO Test an old QuickTime Video File
     @Test(timeout = 30000)
     public void testInfiniteLoop() throws Exception {
-        //test that a truncated mp4 doesn't cause an infinite loop
-        //TIKA-1931 and TIKA-1924
-        try {
-            XMLResult r = getXML("testMP4_truncated.m4a");
-            assertEquals("audio/mp4", r.metadata.get(Metadata.CONTENT_TYPE));
-            assertEquals("M4A", r.metadata.get(XMPDM.AUDIO_COMPRESSOR));
-        } catch (TikaException e) {
-            //java 11
-        }
+        XMLResult r = getXML("testMP4_truncated.m4a");
+        assertEquals("audio/mp4", r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("M4A", r.metadata.get(XMPDM.AUDIO_COMPRESSOR));
     }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/pom.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/pom.xml
index 5bc6310..9e99b12 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/pom.xml
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/pom.xml
@@ -32,7 +32,7 @@
     <dependency>
       <groupId>com.drewnoakes</groupId>
       <artifactId>metadata-extractor</artifactId>
-      <version>2.16.0</version>
+      <version>${metadata.extractor.version}</version>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>