You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/07/14 18:31:03 UTC

[tika] branch main updated: TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 9dc592f95 TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.
9dc592f95 is described below

commit 9dc592f95200885af75671f0e0381770d8b8298f
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 14 14:30:46 2022 -0400

    TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.
---
 .../parser/scientific/integration/TestParsers.java | 22 +++++++++++++++++++---
 .../src/test/resources/2.4.1-no-tesseract.txt      |  4 +---
 .../src/test/resources/2.4.1-tesseract.txt         |  4 +---
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
index ccd1c2074..7123e2edb 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
@@ -18,8 +18,10 @@
 package org.apache.tika.parser.scientific.integration;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
@@ -27,9 +29,12 @@ import java.util.Map;
 
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.external.CompositeExternalParser;
 import org.apache.tika.parser.ocr.TesseractOCRParser;
 
 /**
@@ -54,6 +59,11 @@ public class TestParsers {
         }
 
         int checked = 0;
+        //The initial lists were developed with exiftool installed.  We have since
+        //modified the 2.4.1-* files to act as if no exiftool is installed.
+        //However, on systems with ffmpeg or exiftool installed, we need
+        //to override those file formats
+        CompositeParser externalParser = (CompositeParser) new CompositeExternalParser();
         try (BufferedReader reader =
                      new BufferedReader(new InputStreamReader(
                              getClass().getResourceAsStream(path241),
@@ -63,15 +73,21 @@ public class TestParsers {
                 String[] data = line.split("\t");
                 String mediaType = data[0];
                 String parserClass = data[1];
-                assertEquals(currentDefault.get(mediaType), parserClass);
+
+                Parser external = externalParser.getParsers().get(MediaType.parse(mediaType));
+                if (external != null) {
+                    parserClass = externalParser.getClass().toString();
+                }
+                assertEquals(parserClass, currentDefault.get(mediaType),
+                        "for mediaType '" + mediaType + "'");
                 checked++;
                 line = reader.readLine();
             }
         }
-        assertEquals(358, checked);
+        assertTrue(checked > 340);
     }
 
-    private Map<String, String> getDefault() {
+    private Map<String, String> getDefault() throws IOException, TikaException {
         DefaultParser p = new DefaultParser();
         Map<String, String> ret = new HashMap<>();
         for (Map.Entry<MediaType, Parser> e : p.getParsers().entrySet()) {
diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
index f0bfb8cff..8a964bc71 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
@@ -333,15 +333,13 @@ text/x-java-source	class org.apache.tika.parser.code.SourceCodeParser
 video/3gpp	class org.apache.tika.parser.mp4.MP4Parser
 video/3gpp2	class org.apache.tika.parser.mp4.MP4Parser
 video/daala	class org.gagravarr.tika.OggParser
-video/mp4	class org.apache.tika.parser.external.CompositeExternalParser
-video/mpeg	class org.apache.tika.parser.external.CompositeExternalParser
+video/mp4	class org.apache.tika.parser.mp4.MP4Parser
 video/ogg	class org.gagravarr.tika.OggParser
 video/quicktime	class org.apache.tika.parser.mp4.MP4Parser
 video/theora	class org.gagravarr.tika.TheoraParser
 video/x-dirac	class org.gagravarr.tika.OggParser
 video/x-flv	class org.apache.tika.parser.video.FLVParser
 video/x-m4v	class org.apache.tika.parser.mp4.MP4Parser
-video/x-msvideo	class org.apache.tika.parser.external.CompositeExternalParser
 video/x-oggrgb	class org.gagravarr.tika.OggParser
 video/x-ogguvs	class org.gagravarr.tika.OggParser
 video/x-oggyuv	class org.gagravarr.tika.OggParser
diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
index 439716dfe..69f696f85 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
@@ -343,15 +343,13 @@ text/x-java-source	class org.apache.tika.parser.code.SourceCodeParser
 video/3gpp	class org.apache.tika.parser.mp4.MP4Parser
 video/3gpp2	class org.apache.tika.parser.mp4.MP4Parser
 video/daala	class org.gagravarr.tika.OggParser
-video/mp4	class org.apache.tika.parser.external.CompositeExternalParser
-video/mpeg	class org.apache.tika.parser.external.CompositeExternalParser
+video/mp4	class org.apache.tika.parser.mp4.MP4Parser
 video/ogg	class org.gagravarr.tika.OggParser
 video/quicktime	class org.apache.tika.parser.mp4.MP4Parser
 video/theora	class org.gagravarr.tika.TheoraParser
 video/x-dirac	class org.gagravarr.tika.OggParser
 video/x-flv	class org.apache.tika.parser.video.FLVParser
 video/x-m4v	class org.apache.tika.parser.mp4.MP4Parser
-video/x-msvideo	class org.apache.tika.parser.external.CompositeExternalParser
 video/x-oggrgb	class org.gagravarr.tika.OggParser
 video/x-ogguvs	class org.gagravarr.tika.OggParser
 video/x-oggyuv	class org.gagravarr.tika.OggParser