You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/03/11 15:47:31 UTC
[tika] 04/04: TIKA-2714 -- add detection for rar4 and rar5 files;
throw an UnsupportedFormatException for rar5 files
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 7b83f7538c788af4cd1c9e997e64dc550e733dee
Author: tallison <ta...@apache.org>
AuthorDate: Wed Mar 11 11:38:46 2020 -0400
TIKA-2714 -- add detection for rar4 and rar5 files; throw an UnsupportedFormatException for rar5 files
---
.../resources/org/apache/tika/mime/tika-mimetypes.xml | 15 ++++++++++++++-
.../main/java/org/apache/tika/parser/pkg/RarParser.java | 6 ++++++
.../java/org/apache/tika/parser/pkg/RarParserTest.java | 2 +-
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index ea1f97b..551e55e 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -4177,7 +4177,20 @@
</magic>
<glob pattern="*.rar"/>
</mime-type>
-
+ <mime-type type="application/x-rar-compressed;version=4">
+ <_comment>RAR archive</_comment>
+ <magic priority="60">
+ <match value="\x52\x61\x72\x21\x1a\x07\x00" type="string" offset="0"/>
+ </magic>
+ <sub-class-of type="application/x-rar-compressed"/>
+ </mime-type>
+ <mime-type type="application/x-rar-compressed;version=5">
+ <_comment>RAR archive</_comment>
+ <magic priority="60">
+ <match value="\x52\x61\x72\x21\x1a\x07\x01\x00" type="string" offset="0"/>
+ </magic>
+ <sub-class-of type="application/x-rar-compressed"/>
+ </mime-type>
<mime-type type="application/x-roxio-toast">
<glob pattern="*.toast"/>
<sub-class-of type="application/x-iso9660-image"/>
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
index 633b2cc..4cdcedd 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
@@ -27,11 +27,13 @@ import com.github.junrar.impl.FileVolumeManager;
import com.github.junrar.rarfile.FileHeader;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.UnsupportedFormatException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -62,7 +64,11 @@ public class RarParser extends AbstractParser {
xhtml.startDocument();
EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
+ String mediaType = metadata.get(Metadata.CONTENT_TYPE);
+ if (mediaType != null && mediaType.contains("version=5")) {
+ throw new UnsupportedFormatException("Tika does not yet support rar version 5.");
+ }
Archive rar = null;
try (TemporaryResources tmp = new TemporaryResources()) {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
index 34dcaab..d6f5af1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
@@ -48,7 +48,7 @@ public class RarParserTest extends AbstractPkgTest {
AUTO_DETECT_PARSER.parse(stream, handler, metadata, recursingContext);
}
- assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("application/x-rar-compressed; version=4", metadata.get(Metadata.CONTENT_TYPE));
String content = handler.toString();
assertContains("test-documents/testEXCEL.xls", content);
assertContains("Sample Excel Worksheet", content);