You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by lf...@apache.org on 2023/08/10 21:17:24 UTC

[tika] branch TIKA-4111 created (now 26a9942ff)

This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a change to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git


      at 26a9942ff TIKA-4111 - update release notes

This branch includes the following new commits:

     new adfae59af TIKA-4111 - test file to reproduce issue
     new 43aceea43 TIKA-4111 - test to reproduce issue and additional test
     new bfc71d53f Tika-4111 - return null if main entry is found & adjust depending calls
     new 2dc452ef3 TIKA-4111 - makes streaming detection to continue working
     new 26a9942ff TIKA-4111 - update release notes

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 03/05: Tika-4111 - return null if main entry is found & adjust depending calls

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bfc71d53f4f1c8f95fd89a4ec089d7b7ec192981
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:12:41 2023 -0300

    Tika-4111 - return null if main entry is found & adjust depending calls
---
 .../apache/tika/parser/iwork/iwana/IWork13PackageParser.java  | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
index 2fac21e86..8fe7dc607 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/iwork/iwana/IWork13PackageParser.java
@@ -58,6 +58,7 @@ public class IWork13PackageParser extends AbstractParser {
      * All iWork 13 files contain this, so we can detect based on it
      */
     public final static String IWORK13_COMMON_ENTRY = "Metadata/BuildVersionHistory.plist";
+    public final static String IWORK13_MAIN_ENTRY = "Index/Document.iwa";
 
     public static final String IWORKS_PREFIX = "iworks:";
     public static final Property IWORKS_DOC_ID =
@@ -132,6 +133,9 @@ public class IWork13PackageParser extends AbstractParser {
                     embeddedDocumentExtractor);
             entry = zipStream.getNextEntry();
         }
+        if (type == null) {
+            type = IWork13DocumentType.UNKNOWN13.getType();
+        }
         return type;
     }
 
@@ -157,6 +161,9 @@ public class IWork13PackageParser extends AbstractParser {
                 ex = e;
             }
         }
+        if (type == null) {
+            type = IWork13DocumentType.UNKNOWN13.getType();
+        }
         if (ex != null) {
             throw new TikaException("problem processing zip file", ex);
         }
@@ -310,11 +317,11 @@ public class IWork13PackageParser extends AbstractParser {
             }
 
             // Is it the main document?
-            if (name.equals("Index/Document.iwa")) {
+            if (name.equals(IWORK13_MAIN_ENTRY)) {
                 // TODO Decode the snappy stream, and check for the Message Type
                 // =     2 (TN::SheetArchive), it is a numbers file;
                 // = 10000 (TP::DocumentArchive), that's a pages file
-                return UNKNOWN13.getType();
+                return null;
             }
 
             // Unknown


[tika] 04/05: TIKA-4111 - makes streaming detection to continue working

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 2dc452ef30ad032c23a09cf231fad8bb42705392
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:13:21 2023 -0300

    TIKA-4111 - makes streaming detection to continue working
---
 .../src/main/java/org/apache/tika/detect/apple/IWorkDetector.java     | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/detect/apple/IWorkDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/detect/apple/IWorkDetector.java
index 05603022b..29287da8b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/detect/apple/IWorkDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/detect/apple/IWorkDetector.java
@@ -113,6 +113,10 @@ public class IWorkDetector implements ZipContainerDetector {
         if (entryNames == null) {
             return null;
         }
+        // general iwork 13
+        if (entryNames.names.contains(IWork13PackageParser.IWORK13_MAIN_ENTRY)) {
+            return IWork13PackageParser.IWork13DocumentType.UNKNOWN13.getType();
+        }
         //general iworks
         if (entryNames.names.contains(IWorkPackageParser.IWORK_COMMON_ENTRY)) {
             return MediaType.application("vnd.apple.iwork");


[tika] 05/05: TIKA-4111 - update release notes

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 26a9942ffa0c267d6d07d200e35cac92c7bc4f5f
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:15:54 2023 -0300

    TIKA-4111 - update release notes
---
 CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.txt b/CHANGES.txt
index 23a77a66d..12999a515 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -13,6 +13,8 @@ Release 2.8.1 - ???
    * Fixed write limit bug in RecursiveParserWrapper (TIKA-4055).
 
    * Add mime detection for many files with thanks to Gregory Lepore (TIKA-3992).
+   
+   * Fixed iWork 13 keynote detection on files with wrong extension extension (TIKA-4111).
 
 Release 2.8.0 - 5/11/2023
 


[tika] 02/05: TIKA-4111 - test to reproduce issue and additional test

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 43aceea436f3942fd841e94929394d124d6b0059
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:09:50 2023 -0300

    TIKA-4111 - test to reproduce issue and additional test
---
 .../tika/detect/apple/IWorkDetectorTest.java       | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/java/org/apache/tika/detect/apple/IWorkDetectorTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/java/org/apache/tika/detect/apple/IWorkDetectorTest.java
new file mode 100644
index 000000000..42abe634c
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/java/org/apache/tika/detect/apple/IWorkDetectorTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect.apple;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.iwork.iwana.IWork13PackageParser.IWork13DocumentType;
+import org.apache.tika.parser.iwork.iwana.IWork18PackageParser.IWork18DocumentType;
+
+public class IWorkDetectorTest extends TikaTest {
+
+    @Test
+    public void testDetectKeynote13() throws Exception {
+        String testFile = "/test-documents/testKeynote2013.detect";
+        IWorkDetector detector = new IWorkDetector();
+        try (TikaInputStream tis = TikaInputStream.get(getResourceAsStream(testFile));
+                ZipFile zipFile = new ZipFile(tis.getFile())) {
+            MediaType result = detector.detect(zipFile, tis);
+            assertEquals(IWork13DocumentType.KEYNOTE13.getType(), result);
+        }
+    }
+
+    @Test
+    public void testDetectKeynote18() throws Exception {
+        String testFile = "/test-documents/testKeynote2018.key";
+        IWorkDetector detector = new IWorkDetector();
+        try (TikaInputStream tis = TikaInputStream.get(getResourceAsStream(testFile));
+                ZipFile zipFile = new ZipFile(tis.getFile())) {
+            MediaType result = detector.detect(zipFile, tis);
+            assertEquals(IWork18DocumentType.KEYNOTE18.getType(), result);
+        }
+    }
+
+}


[tika] 01/05: TIKA-4111 - test file to reproduce issue

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch TIKA-4111
in repository https://gitbox.apache.org/repos/asf/tika.git

commit adfae59af4c386372d8ab8248960899b293425c2
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Thu Aug 10 18:09:03 2023 -0300

    TIKA-4111 - test file to reproduce issue
---
 .../resources/test-documents/testKeynote2013.detect    | Bin 0 -> 338001 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.detect b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.detect
new file mode 100644
index 000000000..dbf077c96
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/resources/test-documents/testKeynote2013.detect differ