You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/12/20 18:30:07 UTC
tika git commit: TIKA-2221 -- correctly catch and rethrow encrypted
document exception as EncryptedDocumentException in WordExtractor via Matthew
Caruana Galizia
Repository: tika
Updated Branches:
refs/heads/2.x ffb25af1b -> ee761ac00
TIKA-2221 -- correctly catch and rethrow encrypted document exception as EncryptedDocumentException in WordExtractor via Matthew Caruana Galizia
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ee761ac0
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ee761ac0
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ee761ac0
Branch: refs/heads/2.x
Commit: ee761ac00c1dcc80f6c4030fe81a8780c5ac9d7e
Parents: ffb25af
Author: tballison <ta...@mitre.org>
Authored: Tue Dec 20 13:30:00 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Tue Dec 20 13:30:00 2016 -0500
----------------------------------------------------------------------
.../org/apache/tika/parser/microsoft/WordExtractor.java | 3 +++
.../org/apache/tika/parser/microsoft/WordParserTest.java | 11 +++++++++++
2 files changed, 14 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/ee761ac0/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
index 6fd8f8e..a3664c1 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
@@ -50,6 +50,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -151,6 +152,8 @@ public class WordExtractor extends AbstractPOIFSExtractor {
HWPFDocument document;
try {
document = new HWPFDocument(root);
+ } catch (org.apache.poi.EncryptedDocumentException e) {
+ throw new EncryptedDocumentException(e);
} catch (OldWordFileFormatException e) {
parseWord6(root, xhtml);
return;
http://git-wip-us.apache.org/repos/asf/tika/blob/ee761ac0/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
index 9660363..abb15c7 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.io.InputStream;
import java.util.Arrays;
@@ -554,5 +555,15 @@ public class WordParserTest extends TikaTest {
//moveFrom is deleted in .doc files
assertContainsCount("Second paragraph", r.xml, 2);
}
+
+ @Test
+ public void testProtected() throws Exception {
+ try {
+ getXML("testWORD_protected_passtika.doc");
+ fail("should have thrown encrypted document exception");
+ } catch (org.apache.tika.exception.EncryptedDocumentException e) {
+
+ }
+ }
}