You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2020/09/21 19:34:05 UTC

[tika] branch branch_1x updated: TIKA-3189: Updated Adobe Framemaker MIF Parser to perform version check as only versions 8 and above are supported

This is an automated email from the ASF dual-hosted git repository.

dmeikle pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 29be2f1  TIKA-3189: Updated Adobe Framemaker MIF Parser to perform version check as only versions 8 and above are supported
     new da32729  Merge branch 'branch_1x' of github.com:apache/tika into branch_1x
29be2f1 is described below

commit 29be2f187d1156e544436ef375bc4ac9d7f27bb1
Author: David Meikle <dm...@apache.org>
AuthorDate: Mon Sep 21 20:31:17 2020 +0100

    TIKA-3189: Updated Adobe Framemaker MIF Parser to perform version check as only versions 8 and above are supported
---
 .../main/resources/org/apache/tika/mime/tika-mimetypes.xml |  6 ------
 .../main/java/org/apache/tika/parser/mif/MIFParser.java    | 14 ++++++++++++++
 .../java/org/apache/tika/parser/mif/MIFParserTest.java     |  7 +++++++
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index f929928..b4981a5 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -480,12 +480,6 @@
     <glob pattern="*.mscml"/>
   </mime-type>
 
-  <mime-type type="application/vnd.mif">
-    <_comment>Adobe MIF File</_comment>
-    <glob pattern="*.mif"/>
-    <sub-class-of type="text/plain"/>
-  </mime-type>
-
   <mime-type type="application/mikey"/>
   <mime-type type="application/moss-keys"/>
   <mime-type type="application/moss-signature"/>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
index 2a44390..c917232 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mif/MIFParser.java
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.mif;
 
 import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.tika.detect.AutoDetectReader;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.exception.TikaException;
@@ -37,15 +38,20 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.Optional;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class MIFParser extends AbstractEncodingDetectorParser {
 
+    private static final Pattern versionPattern = Pattern.compile("<MIFFile (\\d*)");
+
     private static final Set<MediaType> SUPPORTED_TYPES =
             Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
                     MediaType.application("vnd.mif"),
                     MediaType.application("x-maker"),
                     MediaType.application("x-mif"))));
 
+
     public MIFParser() {
         super();
     }
@@ -66,6 +72,14 @@ public class MIFParser extends AbstractEncodingDetectorParser {
         try (AutoDetectReader reader = new AutoDetectReader(
                 new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) {
 
+            String version = reader.readLine();
+            version = StringUtils.substringBefore(version, ">");
+            Matcher versionCheck = versionPattern.matcher(version);
+            if (!versionCheck.matches() || Double.parseDouble(versionCheck.group(1)) < 8) {
+                throw new TikaException("Unsupported MIF File. Tika supports MIF version 8 and above.");
+            }
+            reader.reset();
+
             Charset charset = reader.getCharset();
             metadata.set(Metadata.CONTENT_ENCODING, charset.name());
             Optional<MediaType> firstElement = SUPPORTED_TYPES.stream().findFirst();
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
index aa04972..51d295c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mif/MIFParserTest.java
@@ -18,6 +18,7 @@
 package org.apache.tika.parser.mif;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.Parser;
 import org.junit.Test;
@@ -52,4 +53,10 @@ public class MIFParserTest extends TikaTest {
         assertContains("<meta name=\"Content-Type\" content=\"application/x-mif\" />", xml);
     }
 
+    @Test(expected = TikaException.class)
+    public void testParserVersionCheck() throws Exception {
+        Metadata metadata = new Metadata();
+        getText("testMIF.mif", parser, metadata);
+    }
+
 }
\ No newline at end of file