You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/03/11 15:47:31 UTC

[tika] 04/04: TIKA-2714 -- add detection for rar4 and rar5 files; throw an UnsupportedFormatException for rar5 files

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7b83f7538c788af4cd1c9e997e64dc550e733dee
Author: tallison <ta...@apache.org>
AuthorDate: Wed Mar 11 11:38:46 2020 -0400

    TIKA-2714 -- add detection for rar4 and rar5 files; throw an UnsupportedFormatException for rar5 files
---
 .../resources/org/apache/tika/mime/tika-mimetypes.xml     | 15 ++++++++++++++-
 .../main/java/org/apache/tika/parser/pkg/RarParser.java   |  6 ++++++
 .../java/org/apache/tika/parser/pkg/RarParserTest.java    |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index ea1f97b..551e55e 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -4177,7 +4177,20 @@
     </magic>
     <glob pattern="*.rar"/>
   </mime-type>
-
+  <mime-type type="application/x-rar-compressed;version=4">
+    <_comment>RAR archive</_comment>
+    <magic priority="60">
+      <match value="\x52\x61\x72\x21\x1a\x07\x00" type="string" offset="0"/>
+    </magic>
+    <sub-class-of type="application/x-rar-compressed"/>
+  </mime-type>
+  <mime-type type="application/x-rar-compressed;version=5">
+    <_comment>RAR archive</_comment>
+    <magic priority="60">
+      <match value="\x52\x61\x72\x21\x1a\x07\x01\x00" type="string" offset="0"/>
+    </magic>
+    <sub-class-of type="application/x-rar-compressed"/>
+  </mime-type>
   <mime-type type="application/x-roxio-toast">
     <glob pattern="*.toast"/>
     <sub-class-of type="application/x-iso9660-image"/>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
index 633b2cc..4cdcedd 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
@@ -27,11 +27,13 @@ import com.github.junrar.impl.FileVolumeManager;
 import com.github.junrar.rarfile.FileHeader;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.UnsupportedFormatException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
@@ -62,7 +64,11 @@ public class RarParser extends AbstractParser {
         xhtml.startDocument();
 
         EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
+        String mediaType = metadata.get(Metadata.CONTENT_TYPE);
 
+        if (mediaType != null && mediaType.contains("version=5")) {
+            throw new UnsupportedFormatException("Tika does not yet support rar version 5.");
+        }
         Archive rar = null;
         try (TemporaryResources tmp = new TemporaryResources()) {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
index 34dcaab..d6f5af1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
@@ -48,7 +48,7 @@ public class RarParserTest extends AbstractPkgTest {
             AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
         }
 
-        assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("application/x-rar-compressed; version=4", metadata.get(Metadata.CONTENT_TYPE));
         String content = handler.toString();
         assertContains("test-documents/testEXCEL.xls", content);
         assertContains("Sample Excel Worksheet", content);