You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by lf...@apache.org on 2023/08/10 21:17:27 UTC

[tika] 03/05: Tika-4111 - return null if main entry is found & adjust depending calls

This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bfc71d53f4f1c8f95fd89a4ec089d7b7ec192981
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:12:41 2023 -0300

    Tika-4111 - return null if main entry is found & adjust depending calls
---
 .../apache/tika/parser/iwork/iwana/IWork13PackageParser.java  | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
index 2fac21e86..8fe7dc607 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
@@ -58,6 +58,7 @@ public class IWork13PackageParser extends AbstractParser {
      * All iWork 13 files contain this, so we can detect based on it
      */
     public final static String IWORK13_COMMON_ENTRY = "Metadata/BuildVersionHistory.plist";
+    public final static String IWORK13_MAIN_ENTRY = "Index/Document.iwa";
 
     public static final String IWORKS_PREFIX = "iworks:";
     public static final Property IWORKS_DOC_ID =
@@ -132,6 +133,9 @@ public class IWork13PackageParser extends AbstractParser {
                     embeddedDocumentExtractor);
             entry = zipStream.getNextEntry();
         }
+        if (type == null) {
+            type = IWork13DocumentType.UNKNOWN13.getType();
+        }
         return type;
     }
 
@@ -157,6 +161,9 @@ public class IWork13PackageParser extends AbstractParser {
                 ex = e;
             }
         }
+        if (type == null) {
+            type = IWork13DocumentType.UNKNOWN13.getType();
+        }
         if (ex != null) {
             throw new TikaException("problem processing zip file", ex);
         }
@@ -310,11 +317,11 @@ public class IWork13PackageParser extends AbstractParser {
             }
 
             // Is it the main document?
-            if (name.equals("Index/Document.iwa")) {
+            if (name.equals(IWORK13_MAIN_ENTRY)) {
                 // TODO Decode the snappy stream, and check for the Message Type
                 // =     2 (TN::SheetArchive), it is a numbers file;
                 // = 10000 (TP::DocumentArchive), that's a pages file
-                return UNKNOWN13.getType();
+                return null;
             }
 
             // Unknown