You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/11/14 15:42:30 UTC

[tika] 01/02: TIKA-2782 -- confirm child streams are redirected. Add workaround (shameless hack) if logger writes before streams are redirected.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 4ea421a7aef5d360ae2011c31d961c3dfbe6bfb9
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue Nov 13 21:34:44 2018 -0500

    TIKA-2782 -- confirm child streams are redirected.  Add workaround (shameless hack) if logger writes before streams are redirected.
---
 .../org/apache/tika/server/ServerTimeouts.java     |  10 ++
 .../java/org/apache/tika/server/TikaServerCli.java |  24 +++--
 .../org/apache/tika/server/TikaServerWatchDog.java |  29 +++++-
 .../tika/server/TikaServerIntegrationTest.java     | 103 ++++++++++++++++++++-
 .../src/test/resources/logging/log4j_child.xml     |  29 ++++++
 .../src/test/resources/mock/testStdOutErr.xml      |  75 +++++++++++++++
 6 files changed, 260 insertions(+), 10 deletions(-)

diff --git a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java b/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java
index b85d89c..34df411 100644
--- a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java
+++ b/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java
@@ -52,6 +52,8 @@ public class ServerTimeouts {
      */
     public static final long DEFAULT_TASK_TIMEOUT_MILLIS = 120000;
 
+    private int maxRestarts = -1;
+
     private long taskTimeoutMillis = DEFAULT_TASK_TIMEOUT_MILLIS;
 
     private long pingTimeoutMillis = DEFAULT_PING_TIMEOUT_MILLIS;
@@ -103,4 +105,12 @@ public class ServerTimeouts {
     public void setPingPulseMillis(long pingPulseMillis) {
         this.pingPulseMillis = pingPulseMillis;
     }
+
+    public int getMaxRestarts() {
+        return maxRestarts;
+    }
+
+    public void setMaxRestarts(int maxRestarts) {
+        this.maxRestarts = maxRestarts;
+    }
 }
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
index d63a515..0cdce50 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
@@ -18,6 +18,8 @@
 package org.apache.tika.server;
 
 import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -84,7 +86,7 @@ public class TikaServerCli {
 
     private static final List<String> ONLY_IN_SPAWN_CHILD_MODE =
             Arrays.asList(new String[] { "taskTimeoutMillis", "taskPulseMillis",
-            "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome"});
+            "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome", "maxRestarts"});
 
     private static Options getOptions() {
         Options options = new Options();
@@ -104,7 +106,7 @@ public class TikaServerCli {
         options.addOption("taskPulseMillis", true, "Only in spawn child mode: how often to check if a task has timed out.");
         options.addOption("pingTimeoutMillis", true, "Only in spawn child mode: how long to wait to wait for a ping and/or ping response.");
         options.addOption("pingPulseMillis", true, "Only in spawn child mode: how often to check if a ping has timed out.");
-
+        options.addOption("maxRestarts", true, "Only in spawn child mode: how many times to restart child process, default is -1 (always restart)");
         options.addOption("maxFiles", true, "Only in spawn child mode: shutdown server after this many files -- use only in 'spawnChild' mode");
         options.addOption("javaHome", true, "Override system property JAVA_HOME for calling java for the child process");
         options.addOption("child", false, "this process is a child process -- EXPERT -- " +
@@ -257,18 +259,24 @@ public class TikaServerCli {
             ServerStatus serverStatus = new ServerStatus();
             //if this is a child process
             if (line.hasOption("child")) {
+                //redirect!!!
+                PrintStream out = System.out;
+                InputStream in = System.in;
+                System.setIn(new ByteArrayInputStream(new byte[0]));
+                System.setOut(System.err);
+
                 long maxFiles = DEFAULT_MAX_FILES;
                 if (line.hasOption("maxFiles")) {
                     maxFiles = Long.parseLong(line.getOptionValue("maxFiles"));
                 }
 
                 ServerTimeouts serverTimeouts = configureServerTimeouts(line);
+
                 Thread serverThread =
-                new Thread(new ServerStatusWatcher(serverStatus, System.in,
-                        System.out, maxFiles, serverTimeouts));
+                new Thread(new ServerStatusWatcher(serverStatus, in,
+                        out, maxFiles, serverTimeouts));
+
                 serverThread.start();
-                System.setIn(new ByteArrayInputStream(new byte[0]));
-                System.setOut(System.err);
             }
             TikaResource.init(tika, digester, inputStreamFactory, serverStatus);
             JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
@@ -347,6 +355,10 @@ public class TikaServerCli {
                     Long.parseLong(line.getOptionValue("pingPulseMillis")));
         }
 
+        if (line.hasOption("maxRestarts")) {
+            serverTimeouts.setMaxRestarts(Integer.parseInt(line.getOptionValue("maxRestarts")));
+        }
+
         return serverTimeouts;
     }
 
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java
index 964577c..5bf9bf3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java
@@ -21,6 +21,7 @@ import org.apache.tika.utils.ProcessUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
@@ -49,9 +50,10 @@ public class TikaServerWatchDog {
     private volatile CHILD_STATUS childStatus = CHILD_STATUS.INITIALIZING;
     private volatile Instant lastPing = null;
     private ChildProcess childProcess = null;
-    int restarts = 0;
+
 
     public void execute(String[] args, ServerTimeouts serverTimeouts) throws Exception {
+        LOG.info("server watch dog is starting up");
         //if the child thread is in stop-the-world mode, and isn't
         //responding to the ping, this thread checks to make sure
         //that the parent ping is sent and received often enough
@@ -92,6 +94,7 @@ public class TikaServerWatchDog {
         try {
             childProcess = new ChildProcess(args);
             setChildStatus(CHILD_STATUS.RUNNING);
+            int restarts = 0;
             while (true) {
 
                 if (!childProcess.ping()) {
@@ -102,6 +105,11 @@ public class TikaServerWatchDog {
                     childProcess = new ChildProcess(args);
                     LOG.info("Successfully restarted child process -- {} restarts so far)", ++restarts);
                     setChildStatus(CHILD_STATUS.RUNNING);
+                    restarts++;
+                    if (serverTimeouts.getMaxRestarts() > -1 && restarts >= serverTimeouts.getMaxRestarts()) {
+                        LOG.warn("hit max restarts: "+restarts+". Stopping now");
+                        break;
+                    }
                 }
                 Thread.sleep(serverTimeouts.getPingPulseMillis());
             }
@@ -194,7 +202,22 @@ public class TikaServerWatchDog {
 
             this.fromChild = new DataInputStream(process.getInputStream());
             this.toChild = new DataOutputStream(process.getOutputStream());
-            byte status = fromChild.readByte();
+            //if logger's debug=true, there can be a bunch of stuff that
+            //was written to the process's inputstream _before_
+            //we did the redirect.
+            //These bytes need to be read from fromChild before the child has actually
+            //started...allow 64,000 bytes...completely arbitrary.
+            //this is admittedly hacky...If the logger writes 0, we'd
+            //interpret that as "OPERATING"...need to figure out
+            //better way to siphon statically written bytes before
+            //we do the redirect of streams.
+            int maxStartBytes = 64000;
+            int status = fromChild.readByte();
+            int read = 0;
+            while (status > -1 && read < maxStartBytes && status != ServerStatus.STATUS.OPERATING.getByte()) {
+                status = fromChild.readByte();
+                read++;
+            }
             if (status != ServerStatus.STATUS.OPERATING.getByte()) {
                 try {
                     ServerStatus.STATUS currStatus = ServerStatus.STATUS.lookup(status);
@@ -205,7 +228,7 @@ public class TikaServerWatchDog {
                 }
                 int len = process.getInputStream().available();
                 byte[] msg = new byte[len+1];
-                msg[0] = status;
+                msg[0] = (byte)status;
                 process.getInputStream().read(msg, 1, len);
 
                 throw new IOException(
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java
index 7003ab6..0d8a128 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java
@@ -22,7 +22,9 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.OfficeOpenXMLExtended;
 import org.apache.tika.metadata.serialization.JsonMetadataList;
 import org.junit.After;
+import org.junit.AfterClass;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -30,6 +32,9 @@ import javax.ws.rs.core.Response;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.security.Permission;
 import java.time.Duration;
 import java.time.Instant;
@@ -46,6 +51,7 @@ public class TikaServerIntegrationTest extends TikaTest {
     private static final String TEST_SYSTEM_EXIT = "mock/system_exit.xml";
     private static final String TEST_HEAVY_HANG = "mock/heavy_hang_30000.xml";
     private static final String TEST_HEAVY_HANG_SHORT = "mock/heavy_hang_100.xml";
+    private static final String TEST_STDOUT_STDERR = "mock/testStdOutErr.xml";
     private static final String META_PATH = "/rmeta";
 
     //running into conflicts on 9998 with the CXFTestBase tests
@@ -56,6 +62,7 @@ public class TikaServerIntegrationTest extends TikaTest {
             "http://localhost:" + INTEGRATION_TEST_PORT;
 
     private SecurityManager existingSecurityManager = null;
+    private static Path LOG_FILE;
 
     private static class MyExitException extends RuntimeException {
         private final int status;
@@ -67,6 +74,12 @@ public class TikaServerIntegrationTest extends TikaTest {
             return status;
         }
     }
+    @BeforeClass
+    public static void staticSetup() throws Exception {
+        LOG_FILE = Files.createTempFile("tika-server-integration", ".xml");
+        Files.copy(TikaServerIntegrationTest.class.getResourceAsStream("/logging/log4j_child.xml"), LOG_FILE, StandardCopyOption.REPLACE_EXISTING);
+    }
+
     @Before
     public void setUp() throws Exception {
         SecurityManager existingSecurityManager = System.getSecurityManager();
@@ -87,8 +100,13 @@ public class TikaServerIntegrationTest extends TikaTest {
         });
     }
 
+    @AfterClass
+    public static void staticTearDown() throws Exception {
+        Files.delete(LOG_FILE);
+    }
+
     @After
-    public void tearDown() {
+    public void tearDown() throws Exception {
         System.setSecurityManager(existingSecurityManager);
     }
 
@@ -332,6 +350,89 @@ public class TikaServerIntegrationTest extends TikaTest {
 
         assertEquals(-1, i.get());
     }
+
+    @Test
+    public void testStdErrOutBasic() throws Exception {
+        final AtomicInteger i = new AtomicInteger();
+        Thread serverThread = new Thread() {
+            @Override
+            public void run() {
+                TikaServerCli.main(
+                        new String[]{
+                                "-spawnChild",
+                                "-p", INTEGRATION_TEST_PORT,
+                                "-taskTimeoutMillis", "10000", "-taskPulseMillis", "500",
+                                "-pingPulseMillis", "100"
+                        });
+            }
+        };
+        serverThread.start();
+        awaitServerStartup();
+
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_STDOUT_STDERR));
+        Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(1, metadataList.size());
+        assertContains("quick brown fox", metadataList.get(0).get("X-TIKA:content"));
+
+        response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+        reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(12, metadataList.size());
+        assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION));
+        assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content"));
+        serverThread.interrupt();
+    }
+
+    @Test
+    public void testStdErrOutLogging() throws Exception {
+        final AtomicInteger i = new AtomicInteger();
+        Thread serverThread = new Thread() {
+            @Override
+            public void run() {
+                TikaServerCli.main(
+                        new String[]{
+                                "-spawnChild",
+                                "-p", INTEGRATION_TEST_PORT,
+                                "-taskTimeoutMillis", "10000", "-taskPulseMillis", "500",
+                                "-pingPulseMillis", "100", "-maxRestarts", "0",
+                                "-JDlog4j.configuration=file:"+ LOG_FILE.toAbsolutePath()
+                        });
+            }
+        };
+        serverThread.start();
+        awaitServerStartup();
+
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_STDOUT_STDERR));
+        Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(1, metadataList.size());
+        assertContains("quick brown fox", metadataList.get(0).get("X-TIKA:content"));
+
+        response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+        reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(12, metadataList.size());
+        assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION));
+        assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content"));
+        serverThread.interrupt();
+    }
     private void awaitServerStartup() throws Exception {
 
         Instant started = Instant.now();
diff --git a/tika-server/src/test/resources/logging/log4j_child.xml b/tika-server/src/test/resources/logging/log4j_child.xml
new file mode 100644
index 0000000..7391faf
--- /dev/null
+++ b/tika-server/src/test/resources/logging/log4j_child.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+<log4j:configuration debug="true">
+    <appender name="stdout" class="org.apache.log4j.ConsoleAppender">
+        <param name="Target" value="System.out"/>
+        <layout class="org.apache.log4j.PatternLayout">
+            <!-- Pattern to output the caller's file name and line number -->
+            <!--<param name="ConversionPattern" value="%5p [%t] (%F:%L) - %m%n"/>-->
+            <param name="ConversionPattern" value="%m%n"/>
+        </layout>
+    </appender>
+    <appender name="stderr" class="org.apache.log4j.ConsoleAppender">
+        <param name="Target" value="System.err"/>
+        <layout class="org.apache.log4j.PatternLayout">
+            <!-- Pattern to output the caller's file name and line number -->
+            <!--<param name="ConversionPattern" value="%5p [%t] (%F:%L) - %m%n"/>-->
+            <param name="ConversionPattern" value="%m%n"/>
+        </layout>
+    </appender>
+    <logger name="org.apache.tika" additivity="true">
+        <level value="debug"/>
+        <appender-ref ref="stdout"/>
+    </logger>
+    <logger name="org.apache.tika" additivity="true">
+        <level value="debug"/>
+        <appender-ref ref="stderr"/>
+    </logger>
+
+</log4j:configuration>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/testStdOutErr.xml b/tika-server/src/test/resources/mock/testStdOutErr.xml
new file mode 100644
index 0000000..899faef
--- /dev/null
+++ b/tika-server/src/test/resources/mock/testStdOutErr.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+
+
+    <!-- action can be "add" or "set" -->
+    <metadata action="add" name="dc:creator">Nikolai Lobachevsky</metadata>
+    <!-- element is the name of the sax event to write, p=paragraph
+        if the element is not specified, the default is <p> -->
+    <hang millis="100" heavy="false"  />
+    <write element="p">hello world! </write>
+    <!-- write something to System.out -->
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="10" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="80" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="70" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="60" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="200" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="10" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="300" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="101" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="110" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="20" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="200" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="300" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="100" heavy="false"  />
+    <print_out>writing to System.out</print_out>
+    <hang millis="500" heavy="false"  />
+    <!-- write something to System.err -->
+    <print_err>writing to System.err</print_err>
+
+    <write element="p">Even after the calls to Sys.out/err...hello world! the quick brown fox jumped over the lazy dog</write>
+
+</mock>
\ No newline at end of file