You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/16 13:12:14 UTC

[tika] branch main updated: TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 3f9e7a3c5 TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.
3f9e7a3c5 is described below

commit 3f9e7a3c5c74890dc939cf2422e5057544f5c535
Author: tallison <ta...@apache.org>
AuthorDate: Mon May 16 09:11:57 2022 -0400

    TIKA-3763 -- make sure to close the ForkParser in TikaCLI and elsewhere.
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java | 23 ++++++++++------
 .../test/java/org/apache/tika/bundle/BundleIT.java | 32 ++++++++++++----------
 .../java/org/apache/tika/fork/ForkParserTest.java  |  4 +++
 .../parser/fork/ForkParserIntegrationTest.java     |  2 ++
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 4da57a463..651ba7c11 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -1082,15 +1082,20 @@ public class TikaCLI {
                 p = new ForkParser(TikaCLI.class.getClassLoader(), p);
             }
             ContentHandler handler = getContentHandler(output, metadata);
-
-            p.parse(input, handler, metadata, context);
-            // fix for TIKA-596: if a parser doesn't generate
-            // XHTML output, the lack of an output document prevents
-            // metadata from being output: this fixes that
-            if (handler instanceof NoDocumentMetHandler) {
-                NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
-                if (!metHandler.metOutput()) {
-                    metHandler.endDocument();
+            try {
+                p.parse(input, handler, metadata, context);
+                // fix for TIKA-596: if a parser doesn't generate
+                // XHTML output, the lack of an output document prevents
+                // metadata from being output: this fixes that
+                if (handler instanceof NoDocumentMetHandler) {
+                    NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
+                    if (!metHandler.metOutput()) {
+                        metHandler.endDocument();
+                    }
+                }
+            } finally {
+                if (fork) {
+                    ((ForkParser) p).close();
                 }
             }
         }
diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index 1387a0e2a..2d04ac1f9 100644
--- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -159,21 +159,23 @@ public class BundleIT {
 
     @Test
     public void testForkParser() throws Exception {
-        ForkParser parser = new ForkParser(Activator.class.getClassLoader(), defaultParser);
-        String data = "<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
-        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
-        Writer writer = new StringWriter();
-        ContentHandler contentHandler = new BodyContentHandler(writer);
-        Metadata metadata = new Metadata();
-        MediaType type = contentTypeDetector.detect(stream, metadata);
-        assertEquals(type.toString(), "text/html");
-        metadata.add(Metadata.CONTENT_TYPE, type.toString());
-        ParseContext parseCtx = new ParseContext();
-        parser.parse(stream, contentHandler, metadata, parseCtx);
-        writer.flush();
-        String content = writer.toString();
-        assertTrue(content.length() > 0);
-        assertEquals("test content", content.trim());
+        try (ForkParser parser = new ForkParser(Activator.class.getClassLoader(), defaultParser)) {
+            String data =
+                    "<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
+            InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
+            Writer writer = new StringWriter();
+            ContentHandler contentHandler = new BodyContentHandler(writer);
+            Metadata metadata = new Metadata();
+            MediaType type = contentTypeDetector.detect(stream, metadata);
+            assertEquals(type.toString(), "text/html");
+            metadata.add(Metadata.CONTENT_TYPE, type.toString());
+            ParseContext parseCtx = new ParseContext();
+            parser.parse(stream, contentHandler, metadata, parseCtx);
+            writer.flush();
+            String content = writer.toString();
+            assertTrue(content.length() > 0);
+            assertEquals("test content", content.trim());
+        }
     }
 
     @Test
diff --git a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
index a38641561..8db917a3e 100644
--- a/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
@@ -204,6 +204,8 @@ public class ForkParserTest extends TikaTest {
             fail("should have thrown IOException");
         } catch (TikaException e) {
             //failed to communicate with forked parser process"
+        } finally {
+            forkParser.close();
         }
 
         //test setting very short pulse (10 ms) and a parser that takes at least 1000 ms
@@ -223,6 +225,8 @@ public class ForkParserTest extends TikaTest {
             fail("Should have thrown exception");
         } catch (IOException | TikaException e) {
             //"should have thrown IOException lost connection"
+        } finally {
+            forkParser.close();
         }
     }
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
index a750d2d6f..c2dfbdf39 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -228,6 +228,8 @@ public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
             });
         } catch (Throwable t) {
             t.printStackTrace();
+        } finally {
+            parser.close();
         }
     }