You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/10/07 19:15:03 UTC

[tika] 01/02: WIP do not commit

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 3059bed865e29ce5705d082255ed71034d6ca422
Author: tallison <ta...@apache.org>
AuthorDate: Wed Sep 15 09:38:20 2021 -0400

    WIP do not commit
---
 .../tika/parser/external/ExternalParser.java       |   2 +
 .../org/apache/tika/utils/FileProcessResult.java   | 101 +++++++++++++
 .../java/org/apache/tika/utils/ProcessUtils.java   | 159 +++++++++++++++++++++
 .../java/org/apache/tika/utils/StreamGobbler.java  |  81 +++++++++++
 .../java/org/apache/tika/utils/StringUtils.java    |  18 +++
 5 files changed, 361 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
index 32a179a..9a50c6f 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
@@ -458,4 +458,6 @@ public class ExternalParser extends AbstractParser {
          */
         void consume(String line);
     }
+
+
 }
diff --git a/tika-core/src/main/java/org/apache/tika/utils/FileProcessResult.java b/tika-core/src/main/java/org/apache/tika/utils/FileProcessResult.java
new file mode 100644
index 0000000..f15c080
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/utils/FileProcessResult.java
@@ -0,0 +1,101 @@
+package org.apache.tika.utils;
+
+public class FileProcessResult {
+
+    String stderr = "";
+    String stdout = "";
+    int exitValue = -1;
+    long processTimeMillis = -1;
+    boolean isTimeout = false;
+    long stdoutLength = -1;
+    long stderrLength = -1;
+    boolean stderrTruncated = false;
+    boolean stdoutTruncated = false;
+
+    public String getStderr() {
+        return stderr;
+    }
+
+    public String getStdout() {
+        return stdout;
+    }
+
+    public int getExitValue() {
+        return exitValue;
+    }
+
+    public long getProcessTimeMillis() {
+        return processTimeMillis;
+    }
+
+    public boolean isTimeout() {
+        return isTimeout;
+    }
+
+    public long getStdoutLength() {
+        return stdoutLength;
+    }
+
+    public long getStderrLength() {
+        return stderrLength;
+    }
+
+    public boolean isStderrTruncated() {
+        return stderrTruncated;
+    }
+
+    public boolean isStdoutTruncated() {
+        return stdoutTruncated;
+    }
+
+    public void setStderr(String stderr) {
+        this.stderr = stderr;
+    }
+
+    public void setStdout(String stdout) {
+        this.stdout = stdout;
+    }
+
+    public void setExitValue(int exitValue) {
+        this.exitValue = exitValue;
+    }
+
+    public void setProcessTimeMillis(long processTimeMillis) {
+        this.processTimeMillis = processTimeMillis;
+    }
+
+    public void setTimeout(boolean timeout) {
+        isTimeout = timeout;
+    }
+
+    public void setStdoutLength(long stdoutLength) {
+        this.stdoutLength = stdoutLength;
+    }
+
+    public void setStderrLength(long stderrLength) {
+        this.stderrLength = stderrLength;
+    }
+
+    public void setStderrTruncated(boolean stderrTruncated) {
+        this.stderrTruncated = stderrTruncated;
+    }
+
+    public void setStdoutTruncated(boolean stdoutTruncated) {
+        this.stdoutTruncated = stdoutTruncated;
+    }
+
+    @Override
+    public String toString() {
+        return "FileProcessResult{" +
+                "stderr='" + stderr + '\'' +
+                ", stdout='" + stdout + '\'' +
+                ", exitValue=" + exitValue +
+                ", processTimeMillis=" + processTimeMillis +
+                ", isTimeout=" + isTimeout +
+                ", stdoutLength=" + stdoutLength +
+                ", stderrLength=" + stderrLength +
+                ", stderrTruncated=" + stderrTruncated +
+                ", stdoutTruncated=" + stdoutTruncated +
+                '}';
+    }
+}
\ No newline at end of file
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ProcessUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ProcessUtils.java
index daa92ac..fbe1799 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ProcessUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ProcessUtils.java
@@ -17,8 +17,34 @@
 package org.apache.tika.utils;
 
 
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
 public class ProcessUtils {
 
+
+    private static final ConcurrentHashMap<String, Process> PROCESS_MAP = new ConcurrentHashMap<>();
+
+    static {
+        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+            PROCESS_MAP.forEachValue(1, Process::destroyForcibly);
+        }));
+    }
+
+    private static String register(Process p) {
+        String id = UUID.randomUUID().toString();
+        PROCESS_MAP.put(id, p);
+        return id;
+    }
+
+    private static Process release(String id) {
+        return PROCESS_MAP.remove(id);
+    }
+
     /**
      * This should correctly put double-quotes around an argument if
      * ProcessBuilder doesn't seem to work (as it doesn't
@@ -47,4 +73,137 @@ public class ProcessUtils {
         }
         return arg;
     }
+
+    /**
+     * This writes stdout and stderr to the FileProcessResult.
+     *
+     * @param pb
+     * @param timeoutMillis
+     * @param maxStdoutBuffer
+     * @param maxStdErrBuffer
+     * @return
+     * @throws IOException
+     */
+    public static FileProcessResult execute(ProcessBuilder pb,
+                                            long timeoutMillis,
+                                            int maxStdoutBuffer, int maxStdErrBuffer)
+            throws IOException {
+        Process p = null;
+        String id = null;
+        try {
+            p = pb.start();
+            id = register(p);
+            long elapsed = -1;
+            long start = System.currentTimeMillis();
+            StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(), maxStdoutBuffer);
+            StreamGobbler errGobbler = new StreamGobbler(p.getErrorStream(), maxStdErrBuffer);
+
+            Thread outThread = new Thread(outGobbler);
+            outThread.start();
+
+            Thread errThread = new Thread(errGobbler);
+            errThread.start();
+            int exitValue = -1;
+            boolean complete = false;
+            try {
+                complete = p.waitFor(timeoutMillis, TimeUnit.MILLISECONDS);
+                elapsed = System.currentTimeMillis() - start;
+                if (complete) {
+                    exitValue = p.exitValue();
+                    outThread.join(1000);
+                    errThread.join(1000);
+                } else {
+                    p.destroyForcibly();
+                    outThread.join(1000);
+                    errThread.join(1000);
+                }
+            } catch (InterruptedException e) {
+                exitValue = -1000;
+            }
+            FileProcessResult result = new FileProcessResult();
+            result.processTimeMillis = elapsed;
+            result.stderrLength = errGobbler.getStreamLength();
+            result.stdoutLength = outGobbler.getStreamLength();
+            result.isTimeout = ! complete;
+            result.exitValue = exitValue;
+            result.stdout = StringUtils.joinWith("\n", outGobbler.getLines());
+            result.stderr = StringUtils.joinWith("\n", errGobbler.getLines());
+            result.stdoutTruncated = outGobbler.getIsTruncated();
+            result.stderrTruncated = errGobbler.getIsTruncated();
+            return result;
+        } finally {
+            if (p != null) {
+                p.destroyForcibly();
+            }
+            if (id != null) {
+                release(id);
+            }
+        }
+    }
+
+    /**
+     * This redirects stdout to stdoutRedirect.
+     *
+     * @param pb
+     * @param timeoutMillis
+     * @param stdoutRedirect
+     * @param maxStdErrBuffer
+     * @return
+     * @throws IOException
+     */
+    public static FileProcessResult execute(ProcessBuilder pb,
+                                            long timeoutMillis,
+                                            Path stdoutRedirect, int maxStdErrBuffer) throws IOException {
+
+        if (!Files.isDirectory(stdoutRedirect.getParent())) {
+            Files.createDirectories(stdoutRedirect.getParent());
+        }
+
+        pb.redirectOutput(stdoutRedirect.toFile());
+        Process p = null;
+        String id = null;
+        try {
+            p = pb.start();
+            id = register(p);
+            long elapsed = -1;
+            long start = System.currentTimeMillis();
+            StreamGobbler errGobbler = new StreamGobbler(p.getErrorStream(), maxStdErrBuffer);
+
+            Thread errThread = new Thread(errGobbler);
+            errThread.start();
+            int exitValue = -1;
+            boolean complete = false;
+            try {
+                complete = p.waitFor(timeoutMillis, TimeUnit.MILLISECONDS);
+                elapsed = System.currentTimeMillis() - start;
+                if (complete) {
+                    exitValue = p.exitValue();
+                    errThread.join(1000);
+                } else {
+                    p.destroyForcibly();
+                    errThread.join(1000);
+                }
+            } catch (InterruptedException e) {
+                exitValue = -1000;
+            }
+            FileProcessResult result = new FileProcessResult();
+            result.processTimeMillis = elapsed;
+            result.stderrLength = errGobbler.getStreamLength();
+            result.stdoutLength = Files.size(stdoutRedirect);
+            result.isTimeout = !complete;
+            result.exitValue = exitValue;
+            result.stdout = "";
+            result.stderr = StringUtils.joinWith("\n", errGobbler.getLines());
+            result.stdoutTruncated = false;
+            result.stderrTruncated = errGobbler.getIsTruncated();
+            return result;
+        } finally {
+            if (p != null) {
+                p.destroyForcibly();
+            }
+            release(id);
+        }
+
+    }
+
 }
diff --git a/tika-core/src/main/java/org/apache/tika/utils/StreamGobbler.java b/tika-core/src/main/java/org/apache/tika/utils/StreamGobbler.java
new file mode 100644
index 0000000..7fac035
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/utils/StreamGobbler.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.utils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StreamGobbler implements Runnable {
+
+
+    private final InputStream is;
+    private final int maxBufferLength;
+    List<String> lines = new ArrayList<>();
+    long streamLength = 0;
+    boolean isTruncated = false;
+
+    public StreamGobbler(InputStream is, int maxBufferLength) {
+        this.is = is;
+        this.maxBufferLength = maxBufferLength;
+    }
+
+
+    @Override
+    public void run() {
+
+        try (BufferedReader r = new BufferedReader(
+                new InputStreamReader(is, StandardCharsets.UTF_8))) {
+            String line = r.readLine();
+            while (line != null) {
+                if (maxBufferLength >= 0) {
+                    if (streamLength + line.length() > maxBufferLength) {
+                        int len = maxBufferLength - (int) streamLength;
+                        if (len > 0) {
+                            isTruncated = true;
+                            String truncatedLine = line.substring(0, Math.min(line.length(), len));
+                            lines.add(truncatedLine);
+                        }
+                    } else {
+                        lines.add(line);
+                    }
+                }
+                streamLength += line.length();
+                line = r.readLine();
+            }
+        } catch (IOException e) {
+            return;
+        }
+    }
+
+    public List<String> getLines() {
+        return lines;
+    }
+
+    public long getStreamLength() {
+        return streamLength;
+    }
+
+    public boolean getIsTruncated() {
+        return isTruncated;
+    }
+}
\ No newline at end of file
diff --git a/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java b/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
index 53fd47c..462cceb 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.utils;
 
+import java.util.List;
+
 public class StringUtils {
 
     /**
@@ -203,4 +205,20 @@ public class StringUtils {
                 return buf.toString();
         }
     }
+
+
+    public static String joinWith(String delimiter, List<String> lines) {
+        if (lines.size() == 0) {
+            return EMPTY;
+        }
+        int i = 0;
+        StringBuilder sb = new StringBuilder();
+        for (String line : lines) {
+            if (i++ > 0) {
+                sb.append(delimiter);
+            }
+            sb.append(line);
+        }
+        return sb.toString();
+    }
 }