You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/28 20:54:31 UTC

[tika] branch main updated: TIKA-3505 -- general clean up and ensure that PipesResource turns maxemitbatchbytes to zero -- everything must be emitted through the forked process.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 73222dd  TIKA-3505 -- general clean up and ensure that PipesResource turns maxemitbatchbytes to zero -- everything must be emitted through the forked process.
73222dd is described below

commit 73222dd87e3c78ff1eca45c0c05cffe985b3b67d
Author: tallison <ta...@apache.org>
AuthorDate: Wed Jul 28 16:54:12 2021 -0400

    TIKA-3505 -- general clean up and ensure that PipesResource turns maxemitbatchbytes to zero -- everything must be emitted through the forked process.
---
 .../src/main/java/org/apache/tika/pipes/PipesClient.java   |  1 -
 .../src/main/java/org/apache/tika/pipes/PipesServer.java   |  1 -
 tika-server/tika-server-core/pom.xml                       | 12 ++++++++++++
 .../apache/tika/server/core/resource/PipesResource.java    |  8 ++++++++
 .../java/org/apache/tika/server/core/TikaPipesTest.java    | 14 +++++++++++---
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
index 46248f0..525a423 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
@@ -203,7 +203,6 @@ public class PipesClient implements Closeable {
                 LOG.warn("fetch exception: {} in {} ms", t.getId(), millis);
                 return readMessage(PipesResult.STATUS.FETCH_EXCEPTION);
             case PARSE_SUCCESS:
-            case PARSE_EXCEPTION_EMIT:
                 LOG.info("parse success: {} in {} ms", t.getId(), millis);
                 return deserializeEmitData();
             case PARSE_EXCEPTION_NO_EMIT:
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index ff0d41d..7b14fe8 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -86,7 +86,6 @@ public class PipesServer implements Runnable {
         FETCH_EXCEPTION,
         PARSE_SUCCESS,
         PARSE_EXCEPTION_NO_EMIT,
-        PARSE_EXCEPTION_EMIT,
         EMIT_SUCCESS,
         EMIT_SUCCESS_PARSE_EXCEPTION,
         EMIT_EXCEPTION,
diff --git a/tika-server/tika-server-core/pom.xml b/tika-server/tika-server-core/pom.xml
index 69362e7..f2b08b6 100644
--- a/tika-server/tika-server-core/pom.xml
+++ b/tika-server/tika-server-core/pom.xml
@@ -172,6 +172,18 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <version>${log4j2.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j-impl</artifactId>
+      <version>${log4j2.version}</version>
+      <scope>test</scope>
+    </dependency>
 
   </dependencies>
 
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
index daf4da3..a2bf880 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
@@ -53,6 +53,14 @@ public class PipesResource {
     private final PipesParser pipesParser;
     public PipesResource(java.nio.file.Path tikaConfig) throws TikaConfigException, IOException {
         PipesConfig pipesConfig = PipesConfig.load(tikaConfig);
+        //this has to be zero. everything must be emitted through the PipesServer
+        long maxEmit = pipesConfig.getMaxForEmitBatchBytes();
+        if (maxEmit != 0) {
+            pipesConfig.setMaxForEmitBatchBytes(0);
+            if (maxEmit != PipesConfig.DEFAULT_MAX_FOR_EMIT_BATCH) {
+                LOG.warn("resetting max for emit batch to 0");
+            }
+        }
         this.pipesParser = new PipesParser(pipesConfig);
     }
 
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
index a6ce922..837c894 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
@@ -31,6 +31,7 @@ import java.io.StringWriter;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.ArrayList;
 import java.util.List;
 import javax.ws.rs.core.Response;
@@ -73,6 +74,7 @@ public class TikaPipesTest extends CXFTestBase {
     private static Path TMP_DIR;
     private static Path TMP_OUTPUT_DIR;
     private static Path TMP_OUTPUT_FILE;
+    private static Path TIKA_PIPES_LOG4j2_PATH;
     private static Path TMP_NPE_OUTPUT_FILE;
     private static Path TIKA_CONFIG_PATH;
     private static String TIKA_CONFIG_XML;
@@ -100,7 +102,9 @@ public class TikaPipesTest extends CXFTestBase {
                     inputDir.resolve(mockFile));
         }
         TIKA_CONFIG_PATH = Files.createTempFile(TMP_DIR, "tika-pipes-", ".xml");
-
+        TIKA_PIPES_LOG4j2_PATH = Files.createTempFile(TMP_DIR, "log4j2-", ".xml");
+        Files.copy(TikaPipesTest.class.getResourceAsStream("/log4j2.xml"), TIKA_PIPES_LOG4j2_PATH,
+                StandardCopyOption.REPLACE_EXISTING);
         TIKA_CONFIG_XML =
                 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" +
                         "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" +
@@ -114,8 +118,12 @@ public class TikaPipesTest extends CXFTestBase {
                         "</emitter>" +
                         "</emitters>" + "<pipes><params><tikaConfig>" +
                 ProcessUtils.escapeCommandLine(TIKA_CONFIG_PATH.toAbsolutePath().toString()) +
-                        "</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m" +
-                        "</arg></forkedJvmArgs>" +
+                        "</tikaConfig><numClients>10</numClients>" +
+                        "<forkedJvmArgs>" +
+                        "<arg>-Xmx256m</arg>" +
+                        "<arg>-Dlog4j.configurationFile=file:" +
+                        ProcessUtils.escapeCommandLine(TIKA_PIPES_LOG4j2_PATH.toAbsolutePath().toString()) + "</arg>" +
+                        "</forkedJvmArgs>" +
                         "</params></pipes>" + "</properties>";
         Files.write(TIKA_CONFIG_PATH, TIKA_CONFIG_XML.getBytes(StandardCharsets.UTF_8));
     }