You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/23 21:43:24 UTC

[tika] 03/03: TIKA-3334 -- fix thread safety bug in handling embedded docs in open office parser

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit da055766a892254199ff8253834b871548a9fbef
Author: tallison <ta...@apache.org>
AuthorDate: Tue Mar 23 17:43:00 2021 -0400

    TIKA-3334 -- fix thread safety bug in handling embedded docs in open office parser
---
 CHANGES.txt                                        |  2 ++
 .../apache/tika/parser/odf/OpenDocumentParser.java | 30 +++++++++--------
 .../org/apache/tika/parser/odf/ODFParserTest.java  | 39 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 37dcabe..c16f37b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 1.26 - ??/??/????
 
+   * Fix thread safety bug in OpenOffice parser (TIKA-3334).
+
    * The "writeLimit" header now pertains to the combined characters
      written per container document (and embedded documents) in the /rmeta
      endpoint in tika-server (TIKA-3325); it no longer functions only
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index babaac2..fafefd6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -65,7 +65,7 @@ public class OpenDocumentParser extends AbstractParser {
     private static final long serialVersionUID = -6410276875438618287L;
 
     private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+            Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
                     MediaType.application("vnd.sun.xml.writer"),
                     MediaType.application("vnd.oasis.opendocument.text"),
                     MediaType.application("vnd.oasis.opendocument.graphics"),
@@ -103,8 +103,6 @@ public class OpenDocumentParser extends AbstractParser {
     private static final String META_NAME = "meta.xml";
     private static final String MANIFEST_NAME = "META-INF/manifest.xml";
 
-    private EmbeddedDocumentUtil embeddedDocumentUtil;
-
     private Parser meta = new OpenDocumentMetaParser();
 
     private Parser content = new OpenDocumentContentParser();
@@ -136,7 +134,7 @@ public class OpenDocumentParser extends AbstractParser {
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
 
-        embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
+        EmbeddedDocumentUtil embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
 
         // Open the Zip stream
         // Use a File if we can, and an already open zip is even better
@@ -167,14 +165,14 @@ public class OpenDocumentParser extends AbstractParser {
         try {
             if (zipFile != null) {
                 try {
-                    handleZipFile(zipFile, metadata, context, handler);
+                    handleZipFile(zipFile, metadata, context, handler, embeddedDocumentUtil);
                 } finally {
                     //Do we want to close silently == catch an exception here?
                     zipFile.close();
                 }
             } else {
                 try {
-                    handleZipStream(zipStream, metadata, context, handler);
+                    handleZipStream(zipStream, metadata, context, handler, embeddedDocumentUtil);
                 } finally {
                     //Do we want to close silently == catch an exception here?
                     zipStream.close();
@@ -200,7 +198,9 @@ public class OpenDocumentParser extends AbstractParser {
 
     private void handleZipStream(ZipInputStream zipStream, Metadata metadata,
                                  ParseContext context,
-                                 EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
+                                 EndDocumentShieldingContentHandler handler,
+                                 EmbeddedDocumentUtil embeddedDocumentUtil) throws IOException,
+            TikaException, SAXException {
         ZipEntry entry = zipStream.getNextEntry();
 		if (entry == null) {
 			throw new IOException("No entries found in ZipInputStream");
@@ -208,7 +208,7 @@ public class OpenDocumentParser extends AbstractParser {
 		List<SAXException> saxExceptions = new ArrayList<>();
         do {
             try {
-                handleZipEntry(entry, zipStream, metadata, context, handler);
+                handleZipEntry(entry, zipStream, metadata, context, handler, embeddedDocumentUtil);
             } catch (SAXException e) {
                 if (e.getCause() instanceof EncryptedDocumentException) {
                     throw (EncryptedDocumentException)e.getCause();
@@ -225,12 +225,14 @@ public class OpenDocumentParser extends AbstractParser {
     }
 
     private void handleZipFile(ZipFile zipFile, Metadata metadata,
-                               ParseContext context, EndDocumentShieldingContentHandler handler)
+                               ParseContext context, EndDocumentShieldingContentHandler handler,
+                               EmbeddedDocumentUtil embeddedDocumentUtil)
             throws IOException, TikaException, SAXException {
 
         ZipEntry entry = zipFile.getEntry(MANIFEST_NAME);
         if (entry != null) {
-            handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+            handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler,
+                    embeddedDocumentUtil);
         }
         // If we can, process the metadata first, then the
         //  rest of the file afterwards (TIKA-1353)
@@ -238,19 +240,21 @@ public class OpenDocumentParser extends AbstractParser {
         entry = zipFile.getEntry(META_NAME);
         if (entry != null) {
             handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
-                    handler);
+                    handler, embeddedDocumentUtil);
         }
 
         Enumeration<? extends ZipEntry> entries = zipFile.entries();
         while (entries.hasMoreElements()) {
             entry = entries.nextElement();
             if (!META_NAME.equals(entry.getName())) {
-                handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+                handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler,
+                        embeddedDocumentUtil);
             }
         }
     }
     private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
-                                ParseContext context, ContentHandler handler)
+                                ParseContext context, ContentHandler handler,
+                                EmbeddedDocumentUtil embeddedDocumentUtil)
             throws IOException, SAXException, TikaException {
         if (entry == null) {
             return;
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index 0affa14..6006548 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -24,6 +24,11 @@ import java.io.InputStream;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
@@ -652,4 +657,38 @@ public class ODFParserTest extends TikaTest {
         parseContext.set(Parser.class, new EmptyParser());
         return parseContext;
     }
+
+    @Test
+    public void testMultiThreaded() throws Exception {
+        int numThreads = 10;
+        ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
+        ExecutorCompletionService<Integer> executorCompletionService =
+                new ExecutorCompletionService<>(executorService);
+
+        for (int i = 0; i < numThreads; i++) {
+            executorCompletionService.submit(new Callable<Integer>() {
+                @Override
+                public Integer call() throws Exception {
+                    for (int i = 0; i < 10; i++) {
+                        List<Metadata> metadataList = getRecursiveMetadata("testODTEmbedded.odt");
+                        assertEquals(3, metadataList.size());
+                        assertEquals("THUMBNAIL",
+                                metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+                    }
+                    return 1;
+                }
+            });
+        }
+
+        try {
+            int finished = 0;
+            while (finished < numThreads) {
+                Future<Integer> future = executorCompletionService.take();
+                future.get();
+                finished++;
+            }
+        } finally {
+            executorService.shutdownNow();
+        }
+    }
 }