You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2020/09/21 19:34:05 UTC
[tika] branch branch_1x updated: TIKA-3189: Updated Adobe
Framemaker MIF Parser to perform version check as only versions 8 and above
are supported
This is an automated email from the ASF dual-hosted git repository.
dmeikle pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new 29be2f1 TIKA-3189: Updated Adobe Framemaker MIF Parser to perform version check as only versions 8 and above are supported
new da32729 Merge branch 'branch_1x' of github.com:apache/tika into branch_1x
29be2f1 is described below
commit 29be2f187d1156e544436ef375bc4ac9d7f27bb1
Author: David Meikle <dm...@apache.org>
AuthorDate: Mon Sep 21 20:31:17 2020 +0100
TIKA-3189: Updated Adobe Framemaker MIF Parser to perform version check as only versions 8 and above are supported
---
.../main/resources/org/apache/tika/mime/tika-mimetypes.xml | 6 ------
.../main/java/org/apache/tika/parser/mif/MIFParser.java | 14 ++++++++++++++
.../java/org/apache/tika/parser/mif/MIFParserTest.java | 7 +++++++
3 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index f929928..b4981a5 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -480,12 +480,6 @@
<glob pattern="*.mscml"/>
</mime-type>
- <mime-type type="application/vnd.mif">
- <_comment>Adobe MIF File</_comment>
- <glob pattern="*.mif"/>
- <sub-class-of type="text/plain"/>
- </mime-type>
-
<mime-type type="application/mikey"/>
<mime-type type="application/moss-keys"/>
<mime-type type="application/moss-signature"/>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
index 2a44390..c917232 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.mif;
import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.commons.lang3.StringUtils;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
@@ -37,15 +38,20 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public class MIFParser extends AbstractEncodingDetectorParser {
+ private static final Pattern versionPattern = Pattern.compile("<MIFFile (\\d*)");
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
MediaType.application("vnd.mif"),
MediaType.application("x-maker"),
MediaType.application("x-mif"))));
+
public MIFParser() {
super();
}
@@ -66,6 +72,14 @@ public class MIFParser extends AbstractEncodingDetectorParser {
try (AutoDetectReader reader = new AutoDetectReader(
new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) {
+ String version = reader.readLine();
+ version = StringUtils.substringBefore(version, ">");
+ Matcher versionCheck = versionPattern.matcher(version);
+ if (!versionCheck.matches() || Double.parseDouble(versionCheck.group(1)) < 8) {
+ throw new TikaException("Unsupported MIF File. Tika supports MIF version 8 and above.");
+ }
+ reader.reset();
+
Charset charset = reader.getCharset();
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
Optional<MediaType> firstElement = SUPPORTED_TYPES.stream().findFirst();
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
index aa04972..51d295c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
@@ -18,6 +18,7 @@
package org.apache.tika.parser.mif;
import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.junit.Test;
@@ -52,4 +53,10 @@ public class MIFParserTest extends TikaTest {
assertContains("<meta name=\"Content-Type\" content=\"application/x-mif\" />", xml);
}
+ @Test(expected = TikaException.class)
+ public void testParserVersionCheck() throws Exception {
+ Metadata metadata = new Metadata();
+ getText("testMIF.mif", parser, metadata);
+ }
+
}
\ No newline at end of file