You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/24 16:08:51 UTC
[tika] 01/04: TIKA-3334 -- fix thread safety in OpenDocumentParser
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 667a310fcbfb67de670a26f658e8e7f4f477fdad
Author: tallison <ta...@apache.org>
AuthorDate: Wed Mar 24 11:46:21 2021 -0400
TIKA-3334 -- fix thread safety in OpenDocumentParser
---
.../apache/tika/parser/odf/OpenDocumentParser.java | 29 +++++++++--------
.../org/apache/tika/parser/odf/ODFParserTest.java | 36 ++++++++++++++++++++++
2 files changed, 53 insertions(+), 12 deletions(-)
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index 54f831c..946e6de 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -102,8 +102,6 @@ public class OpenDocumentParser extends AbstractParser {
private static final String META_NAME = "meta.xml";
private static final String MANIFEST_NAME = "META-INF/manifest.xml";
- private EmbeddedDocumentUtil embeddedDocumentUtil;
-
private Parser meta = new OpenDocumentMetaParser();
private Parser content = new OpenDocumentContentParser();
@@ -132,7 +130,7 @@ public class OpenDocumentParser extends AbstractParser {
public void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
- embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
+ EmbeddedDocumentUtil embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
// Open the Zip stream
// Use a File if we can, and an already open zip is even better
@@ -162,14 +160,14 @@ public class OpenDocumentParser extends AbstractParser {
try {
if (zipFile != null) {
try {
- handleZipFile(zipFile, metadata, context, handler);
+ handleZipFile(zipFile, metadata, context, handler, embeddedDocumentUtil);
} finally {
//Do we want to close silently == catch an exception here?
zipFile.close();
}
} else {
try {
- handleZipStream(zipStream, metadata, context, handler);
+ handleZipStream(zipStream, metadata, context, handler, embeddedDocumentUtil);
} finally {
//Do we want to close silently == catch an exception here?
zipStream.close();
@@ -194,7 +192,8 @@ public class OpenDocumentParser extends AbstractParser {
}
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context,
- EndDocumentShieldingContentHandler handler)
+ EndDocumentShieldingContentHandler handler,
+ EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, TikaException, SAXException {
ZipEntry entry = zipStream.getNextEntry();
if (entry == null) {
@@ -203,7 +202,8 @@ public class OpenDocumentParser extends AbstractParser {
List<SAXException> exceptions = new ArrayList<>();
do {
try {
- handleZipEntry(entry, zipStream, metadata, context, handler);
+ handleZipEntry(entry, zipStream, metadata, context, handler,
+ embeddedDocumentUtil);
} catch (SAXException e) {
if (e.getCause() instanceof EncryptedDocumentException) {
throw (EncryptedDocumentException)e.getCause();
@@ -220,7 +220,8 @@ public class OpenDocumentParser extends AbstractParser {
}
private void handleZipFile(ZipFile zipFile, Metadata metadata, ParseContext context,
- EndDocumentShieldingContentHandler handler)
+ EndDocumentShieldingContentHandler handler,
+ EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, TikaException, SAXException {
// If we can, process the metadata first, then the
// rest of the file afterwards (TIKA-1353)
@@ -228,25 +229,29 @@ public class OpenDocumentParser extends AbstractParser {
ZipEntry entry = zipFile.getEntry(MANIFEST_NAME);
if (entry != null) {
- handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+ handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
+ handler, embeddedDocumentUtil);
}
entry = zipFile.getEntry(META_NAME);
if (entry != null) {
- handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+ handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
+ handler, embeddedDocumentUtil);
}
Enumeration<? extends ZipEntry> entries = zipFile.entries();
while (entries.hasMoreElements()) {
entry = entries.nextElement();
if (!META_NAME.equals(entry.getName())) {
- handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+ handleZipEntry(entry, zipFile.getInputStream(entry), metadata,
+ context, handler, embeddedDocumentUtil);
}
}
}
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
- ParseContext context, ContentHandler handler)
+ ParseContext context, ContentHandler handler,
+ EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, SAXException, TikaException {
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index b4eaf76..08a26b4 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -25,6 +25,11 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import org.junit.Test;
import org.xml.sax.ContentHandler;
@@ -411,4 +416,35 @@ public class ODFParserTest extends TikaTest {
parseContext.set(Parser.class, new EmptyParser());
return parseContext;
}
+
+ @Test
+ public void testMultiThreaded() throws Exception {
+ int numThreads = 10;
+ ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
+ ExecutorCompletionService<Integer> executorCompletionService =
+ new ExecutorCompletionService<>(executorService);
+
+ for (int i = 0; i < numThreads; i++) {
+ executorCompletionService.submit(() -> {
+ for (int i1 = 0; i1 < 10; i1++) {
+ List<Metadata> metadataList = getRecursiveMetadata("testODTEmbedded.odt");
+ assertEquals(3, metadataList.size());
+ assertEquals("THUMBNAIL",
+ metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+ }
+ return 1;
+ });
+ }
+
+ try {
+ int finished = 0;
+ while (finished < numThreads) {
+ Future<Integer> future = executorCompletionService.take();
+ future.get();
+ finished++;
+ }
+ } finally {
+ executorService.shutdownNow();
+ }
+ }
}