You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/02/04 18:19:36 UTC

[tika] 01/02: simplify timeout on process call and the destroyforcibly call on the tesseract process

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 2a84826e0dfd068a60f4de906e02b23f9e310d4d
Author: tballison <ta...@apache.org>
AuthorDate: Thu Feb 4 13:16:50 2021 -0500

    simplify timeout on process call and the destroyforcibly call on the tesseract process
---
 .../apache/tika/parser/ocr/TesseractOCRParser.java | 47 +++++++++++-----------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 5a8b0e9..54d9388 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -322,44 +322,46 @@ public class TesseractOCRParser extends AbstractParser {
         
         ProcessBuilder pb = new ProcessBuilder(cmd);
         setEnv(config, pb);
-        final Process process = pb.start();
 
+        Process process = null;
+        try {
+            process = pb.start();
+            runOCRProcess(process, config.getTimeout());
+        } finally {
+            if (process != null) {
+                process.destroyForcibly();
+            }
+        }
+    }
+
+    private void runOCRProcess(Process process, int timeout) throws IOException, TikaException {
         process.getOutputStream().close();
         InputStream out = process.getInputStream();
         InputStream err = process.getErrorStream();
         StringBuilder outBuilder = new StringBuilder();
         StringBuilder errBuilder = new StringBuilder();
-        logStream("OCR MSG", out, input, outBuilder);
-        logStream("OCR ERROR", err, input, errBuilder);
-
-        FutureTask<Integer> waitTask = new FutureTask<>(new Callable<Integer>() {
-            public Integer call() throws Exception {
-                return process.waitFor();
-            }
-        });
-
-        Thread waitThread = new Thread(waitTask);
-        waitThread.start();
+        logStream(out, outBuilder);
+        logStream(err, errBuilder);
 
         int exitValue = Integer.MIN_VALUE;
         try {
-            exitValue = waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
+            boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
+            if (! finished) {
+                throw new TikaException("TesseractOCRParser timeout");
+            }
+            exitValue = process.exitValue();
         } catch (InterruptedException e) {
-            waitThread.interrupt();
-            process.destroy();
             Thread.currentThread().interrupt();
             throw new TikaException("TesseractOCRParser interrupted", e);
-        } catch (ExecutionException e) {
-            // should not be thrown
-        } catch (TimeoutException e) {
-            waitThread.interrupt();
-            process.destroy();
-            throw new TikaException("TesseractOCRParser timeout", e);
+        } catch (IllegalThreadStateException e) {
+            //this _should_ never be thrown
+            throw new TikaException("TesseractOCRParser timeout");
         }
         if (exitValue > 0) {
             throw new TikaException("TesseractOCRParser bad exit value " +
                     exitValue + " err msg: "+errBuilder.toString());
         }
+
     }
 
     /**
@@ -411,8 +413,7 @@ public class TesseractOCRParser extends AbstractParser {
      * stream of the given process to not block the process. The stream is closed
      * once fully processed.
      */
-    private void logStream(final String logType, final InputStream stream,
-                           final File file, final StringBuilder out) {
+    private void logStream(final InputStream stream, final StringBuilder out) {
         new Thread() {
             public void run() {
                 Reader reader = new InputStreamReader(stream, UTF_8);