You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/04 22:07:27 UTC

[tika] branch main updated: TIKA-3873 -- add an encrypted metadata value when docs are encrypted.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 441cef8f3 TIKA-3873 -- add an encrypted metadata value when docs are encrypted.
441cef8f3 is described below

commit 441cef8f3946f4804bf7f472f07cfae36f7e29b5
Author: tballison <ta...@apache.org>
AuthorDate: Fri Nov 4 18:07:20 2022 -0400

    TIKA-3873 -- add an encrypted metadata value when docs are encrypted.
---
 .../src/main/java/org/apache/tika/metadata/TikaCoreProperties.java | 3 +++
 .../main/java/org/apache/tika/parser/RecursiveParserWrapper.java   | 7 +++++++
 .../src/test/java/org/apache/tika/parser/odf/ODFParserTest.java    | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index 03a0e2cd2..8ba1834f4 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -291,6 +291,9 @@ public interface TikaCoreProperties {
     Property SIGNATURE_FILTER = Property.internalTextBag("signature:filter");
     Property SIGNATURE_CONTACT_INFO = Property.internalTextBag("signature:contact-info");
 
+    //is the file encrypted
+    Property IS_ENCRYPTED = Property.internalBoolean(TIKA_META_PREFIX + "encrypted");
+
     /**
      * A file might contain different types of embedded documents.
      * The most common is the ATTACHMENT.
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index e9e9457bb..79ff4c379 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -25,6 +25,7 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.CorruptedFileException;
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.exception.ZeroByteFileException;
@@ -162,6 +163,9 @@ public class RecursiveParserWrapper extends ParserDecorator {
             context.set(RecursivelySecureContentHandler.class, secureContentHandler);
             getWrappedParser().parse(tis, secureContentHandler, metadata, context);
         } catch (Throwable e) {
+            if (e instanceof EncryptedDocumentException) {
+                metadata.set(TikaCoreProperties.IS_ENCRYPTED, "true");
+            }
             if (WriteLimitReachedException.isWriteLimitReached(e)) {
                 metadata.set(TikaCoreProperties.WRITE_LIMIT_REACHED, "true");
             } else {
@@ -255,6 +259,9 @@ public class RecursiveParserWrapper extends ParserDecorator {
             } catch (CorruptedFileException e) {
                 throw e;
             } catch (TikaException e) {
+                if (e instanceof EncryptedDocumentException) {
+                    metadata.set(TikaCoreProperties.IS_ENCRYPTED, true);
+                }
                 if (context.get(ZeroByteFileException.IgnoreZeroByteFileException.class) != null &&
                         e instanceof ZeroByteFileException) {
                     //do nothing
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index b9ee45bc2..7feab3a38 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -414,6 +414,9 @@ public class ODFParserTest extends TikaTest {
         assertThrows(EncryptedDocumentException.class, () -> {
             getRecursiveMetadata(p, false);
         });
+
+        List<Metadata> metadataList = getRecursiveMetadata(p, true);
+        assertEquals("true", metadataList.get(0).get(TikaCoreProperties.IS_ENCRYPTED));
     }
 
     //this, of course, should throw an EncryptedDocumentException