You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/12/20 18:30:07 UTC

tika git commit: TIKA-2221 -- correctly catch and rethrow encrypted document exception as EncryptedDocumentException in WordExtractor via Matthew Caruana Galizia

Repository: tika
Updated Branches:
  refs/heads/2.x ffb25af1b -> ee761ac00


 TIKA-2221 -- correctly catch and rethrow encrypted document exception as EncryptedDocumentException in WordExtractor via Matthew Caruana Galizia


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ee761ac0
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ee761ac0
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ee761ac0

Branch: refs/heads/2.x
Commit: ee761ac00c1dcc80f6c4030fe81a8780c5ac9d7e
Parents: ffb25af
Author: tballison <ta...@mitre.org>
Authored: Tue Dec 20 13:30:00 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Tue Dec 20 13:30:00 2016 -0500

----------------------------------------------------------------------
 .../org/apache/tika/parser/microsoft/WordExtractor.java  |  3 +++
 .../org/apache/tika/parser/microsoft/WordParserTest.java | 11 +++++++++++
 2 files changed, 14 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/ee761ac0/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
index 6fd8f8e..a3664c1 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
@@ -50,6 +50,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -151,6 +152,8 @@ public class WordExtractor extends AbstractPOIFSExtractor {
         HWPFDocument document;
         try {
             document = new HWPFDocument(root);
+        } catch (org.apache.poi.EncryptedDocumentException e) {
+            throw new EncryptedDocumentException(e);
         } catch (OldWordFileFormatException e) {
             parseWord6(root, xhtml);
             return;

http://git-wip-us.apache.org/repos/asf/tika/blob/ee761ac0/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
index 9660363..abb15c7 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.InputStream;
 import java.util.Arrays;
@@ -554,5 +555,15 @@ public class WordParserTest extends TikaTest {
         //moveFrom is deleted in .doc files
         assertContainsCount("Second paragraph", r.xml, 2);
     }
+
+    @Test
+    public void testProtected() throws Exception {
+        try {
+            getXML("testWORD_protected_passtika.doc");
+            fail("should have thrown encrypted document exception");
+        } catch (org.apache.tika.exception.EncryptedDocumentException e) {
+
+        }
+    }
 }