You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/16 13:12:14 UTC
[tika] branch main updated: TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 3f9e7a3c5 TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.
3f9e7a3c5 is described below
commit 3f9e7a3c5c74890dc939cf2422e5057544f5c535
Author: tallison <ta...@apache.org>
AuthorDate: Mon May 16 09:11:57 2022 -0400
TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 23 ++++++++++------
.../test/java/org/apache/tika/bundle/BundleIT.java | 32 ++++++++++++----------
.../java/org/apache/tika/fork/ForkParserTest.java | 4 +++
.../parser/fork/ForkParserIntegrationTest.java | 2 ++
4 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 4da57a463..651ba7c11 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -1082,15 +1082,20 @@ public class TikaCLI {
p = new ForkParser(TikaCLI.class.getClassLoader(), p);
}
ContentHandler handler = getContentHandler(output, metadata);
-
- p.parse(input, handler, metadata, context);
- // fix for TIKA-596: if a parser doesn't generate
- // XHTML output, the lack of an output document prevents
- // metadata from being output: this fixes that
- if (handler instanceof NoDocumentMetHandler) {
- NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
- if (!metHandler.metOutput()) {
- metHandler.endDocument();
+ try {
+ p.parse(input, handler, metadata, context);
+ // fix for TIKA-596: if a parser doesn't generate
+ // XHTML output, the lack of an output document prevents
+ // metadata from being output: this fixes that
+ if (handler instanceof NoDocumentMetHandler) {
+ NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
+ if (!metHandler.metOutput()) {
+ metHandler.endDocument();
+ }
+ }
+ } finally {
+ if (fork) {
+ ((ForkParser) p).close();
}
}
}
diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index 1387a0e2a..2d04ac1f9 100644
--- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -159,21 +159,23 @@ public class BundleIT {
@Test
public void testForkParser() throws Exception {
- ForkParser parser = new ForkParser(Activator.class.getClassLoader(), defaultParser);
- String data = "<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
- InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
- Writer writer = new StringWriter();
- ContentHandler contentHandler = new BodyContentHandler(writer);
- Metadata metadata = new Metadata();
- MediaType type = contentTypeDetector.detect(stream, metadata);
- assertEquals(type.toString(), "text/html");
- metadata.add(Metadata.CONTENT_TYPE, type.toString());
- ParseContext parseCtx = new ParseContext();
- parser.parse(stream, contentHandler, metadata, parseCtx);
- writer.flush();
- String content = writer.toString();
- assertTrue(content.length() > 0);
- assertEquals("test content", content.trim());
+ try (ForkParser parser = new ForkParser(Activator.class.getClassLoader(), defaultParser)) {
+ String data =
+ "<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
+ InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ Metadata metadata = new Metadata();
+ MediaType type = contentTypeDetector.detect(stream, metadata);
+ assertEquals(type.toString(), "text/html");
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ ParseContext parseCtx = new ParseContext();
+ parser.parse(stream, contentHandler, metadata, parseCtx);
+ writer.flush();
+ String content = writer.toString();
+ assertTrue(content.length() > 0);
+ assertEquals("test content", content.trim());
+ }
}
@Test
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
index a38641561..8db917a3e 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
@@ -204,6 +204,8 @@ public class ForkParserTest extends TikaTest {
fail("should have thrown IOException");
} catch (TikaException e) {
//failed to communicate with forked parser process"
+ } finally {
+ forkParser.close();
}
//test setting very short pulse (10 ms) and a parser that takes at least 1000 ms
@@ -223,6 +225,8 @@ public class ForkParserTest extends TikaTest {
fail("Should have thrown exception");
} catch (IOException | TikaException e) {
//"should have thrown IOException lost connection"
+ } finally {
+ forkParser.close();
}
}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
index a750d2d6f..c2dfbdf39 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -228,6 +228,8 @@ public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
});
} catch (Throwable t) {
t.printStackTrace();
+ } finally {
+ parser.close();
}
}