You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/07/14 18:31:03 UTC
[tika] branch main updated: TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 9dc592f95 TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.
9dc592f95 is described below
commit 9dc592f95200885af75671f0e0381770d8b8298f
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 14 14:30:46 2022 -0400
TIKA-3812 -- test should work whether or not ffmpeg and exiftool are installed or not.
---
.../parser/scientific/integration/TestParsers.java | 22 +++++++++++++++++++---
.../src/test/resources/2.4.1-no-tesseract.txt | 4 +---
.../src/test/resources/2.4.1-tesseract.txt | 4 +---
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
index ccd1c2074..7123e2edb 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/java/org/apache/tika/parser/scientific/integration/TestParsers.java
@@ -18,8 +18,10 @@
package org.apache.tika.parser.scientific.integration;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
@@ -27,9 +29,12 @@ import java.util.Map;
import org.junit.jupiter.api.Test;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.external.CompositeExternalParser;
import org.apache.tika.parser.ocr.TesseractOCRParser;
/**
@@ -54,6 +59,11 @@ public class TestParsers {
}
int checked = 0;
+ //The initial lists were developed with exiftool installed. We have since
+ //modified the 2.4.1-* files to act as if no exiftool is installed.
+ //However, on systems with ffmpeg or exiftool installed, we need
+ //to override those file formats
+ CompositeParser externalParser = (CompositeParser) new CompositeExternalParser();
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(
getClass().getResourceAsStream(path241),
@@ -63,15 +73,21 @@ public class TestParsers {
String[] data = line.split("\t");
String mediaType = data[0];
String parserClass = data[1];
- assertEquals(currentDefault.get(mediaType), parserClass);
+
+ Parser external = externalParser.getParsers().get(MediaType.parse(mediaType));
+ if (external != null) {
+ parserClass = externalParser.getClass().toString();
+ }
+ assertEquals(parserClass, currentDefault.get(mediaType),
+ "for mediaType '" + mediaType + "'");
checked++;
line = reader.readLine();
}
}
- assertEquals(358, checked);
+ assertTrue(checked > 340);
}
- private Map<String, String> getDefault() {
+ private Map<String, String> getDefault() throws IOException, TikaException {
DefaultParser p = new DefaultParser();
Map<String, String> ret = new HashMap<>();
for (Map.Entry<MediaType, Parser> e : p.getParsers().entrySet()) {
diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
index f0bfb8cff..8a964bc71 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-no-tesseract.txt
@@ -333,15 +333,13 @@ text/x-java-source class org.apache.tika.parser.code.SourceCodeParser
video/3gpp class org.apache.tika.parser.mp4.MP4Parser
video/3gpp2 class org.apache.tika.parser.mp4.MP4Parser
video/daala class org.gagravarr.tika.OggParser
-video/mp4 class org.apache.tika.parser.external.CompositeExternalParser
-video/mpeg class org.apache.tika.parser.external.CompositeExternalParser
+video/mp4 class org.apache.tika.parser.mp4.MP4Parser
video/ogg class org.gagravarr.tika.OggParser
video/quicktime class org.apache.tika.parser.mp4.MP4Parser
video/theora class org.gagravarr.tika.TheoraParser
video/x-dirac class org.gagravarr.tika.OggParser
video/x-flv class org.apache.tika.parser.video.FLVParser
video/x-m4v class org.apache.tika.parser.mp4.MP4Parser
-video/x-msvideo class org.apache.tika.parser.external.CompositeExternalParser
video/x-oggrgb class org.gagravarr.tika.OggParser
video/x-ogguvs class org.gagravarr.tika.OggParser
video/x-oggyuv class org.gagravarr.tika.OggParser
diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
index 439716dfe..69f696f85 100644
--- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
+++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/src/test/resources/2.4.1-tesseract.txt
@@ -343,15 +343,13 @@ text/x-java-source class org.apache.tika.parser.code.SourceCodeParser
video/3gpp class org.apache.tika.parser.mp4.MP4Parser
video/3gpp2 class org.apache.tika.parser.mp4.MP4Parser
video/daala class org.gagravarr.tika.OggParser
-video/mp4 class org.apache.tika.parser.external.CompositeExternalParser
-video/mpeg class org.apache.tika.parser.external.CompositeExternalParser
+video/mp4 class org.apache.tika.parser.mp4.MP4Parser
video/ogg class org.gagravarr.tika.OggParser
video/quicktime class org.apache.tika.parser.mp4.MP4Parser
video/theora class org.gagravarr.tika.TheoraParser
video/x-dirac class org.gagravarr.tika.OggParser
video/x-flv class org.apache.tika.parser.video.FLVParser
video/x-m4v class org.apache.tika.parser.mp4.MP4Parser
-video/x-msvideo class org.apache.tika.parser.external.CompositeExternalParser
video/x-oggrgb class org.gagravarr.tika.OggParser
video/x-ogguvs class org.gagravarr.tika.OggParser
video/x-oggyuv class org.gagravarr.tika.OggParser