You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/28 20:54:31 UTC
[tika] branch main updated: TIKA-3505 -- general clean up and
ensure that PipesResource turns maxemitbatchbytes to zero -- everything
must be emitted through the forked process.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 73222dd TIKA-3505 -- general clean up and ensure that PipesResource turns maxemitbatchbytes to zero -- everything must be emitted through the forked process.
73222dd is described below
commit 73222dd87e3c78ff1eca45c0c05cffe985b3b67d
Author: tallison <ta...@apache.org>
AuthorDate: Wed Jul 28 16:54:12 2021 -0400
TIKA-3505 -- general clean up and ensure that PipesResource turns maxemitbatchbytes to zero -- everything must be emitted through the forked process.
---
.../src/main/java/org/apache/tika/pipes/PipesClient.java | 1 -
.../src/main/java/org/apache/tika/pipes/PipesServer.java | 1 -
tika-server/tika-server-core/pom.xml | 12 ++++++++++++
.../apache/tika/server/core/resource/PipesResource.java | 8 ++++++++
.../java/org/apache/tika/server/core/TikaPipesTest.java | 14 +++++++++++---
5 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
index 46248f0..525a423 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
@@ -203,7 +203,6 @@ public class PipesClient implements Closeable {
LOG.warn("fetch exception: {} in {} ms", t.getId(), millis);
return readMessage(PipesResult.STATUS.FETCH_EXCEPTION);
case PARSE_SUCCESS:
- case PARSE_EXCEPTION_EMIT:
LOG.info("parse success: {} in {} ms", t.getId(), millis);
return deserializeEmitData();
case PARSE_EXCEPTION_NO_EMIT:
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index ff0d41d..7b14fe8 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -86,7 +86,6 @@ public class PipesServer implements Runnable {
FETCH_EXCEPTION,
PARSE_SUCCESS,
PARSE_EXCEPTION_NO_EMIT,
- PARSE_EXCEPTION_EMIT,
EMIT_SUCCESS,
EMIT_SUCCESS_PARSE_EXCEPTION,
EMIT_EXCEPTION,
diff --git a/tika-server/tika-server-core/pom.xml b/tika-server/tika-server-core/pom.xml
index 69362e7..f2b08b6 100644
--- a/tika-server/tika-server-core/pom.xml
+++ b/tika-server/tika-server-core/pom.xml
@@ -172,6 +172,18 @@
<version>${project.version}</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <version>${log4j2.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <version>${log4j2.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
index daf4da3..a2bf880 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java
@@ -53,6 +53,14 @@ public class PipesResource {
private final PipesParser pipesParser;
public PipesResource(java.nio.file.Path tikaConfig) throws TikaConfigException, IOException {
PipesConfig pipesConfig = PipesConfig.load(tikaConfig);
+ //this has to be zero. everything must be emitted through the PipesServer
+ long maxEmit = pipesConfig.getMaxForEmitBatchBytes();
+ if (maxEmit != 0) {
+ pipesConfig.setMaxForEmitBatchBytes(0);
+ if (maxEmit != PipesConfig.DEFAULT_MAX_FOR_EMIT_BATCH) {
+ LOG.warn("resetting max for emit batch to 0");
+ }
+ }
this.pipesParser = new PipesParser(pipesConfig);
}
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
index a6ce922..837c894 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java
@@ -31,6 +31,7 @@ import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.List;
import javax.ws.rs.core.Response;
@@ -73,6 +74,7 @@ public class TikaPipesTest extends CXFTestBase {
private static Path TMP_DIR;
private static Path TMP_OUTPUT_DIR;
private static Path TMP_OUTPUT_FILE;
+ private static Path TIKA_PIPES_LOG4j2_PATH;
private static Path TMP_NPE_OUTPUT_FILE;
private static Path TIKA_CONFIG_PATH;
private static String TIKA_CONFIG_XML;
@@ -100,7 +102,9 @@ public class TikaPipesTest extends CXFTestBase {
inputDir.resolve(mockFile));
}
TIKA_CONFIG_PATH = Files.createTempFile(TMP_DIR, "tika-pipes-", ".xml");
-
+ TIKA_PIPES_LOG4j2_PATH = Files.createTempFile(TMP_DIR, "log4j2-", ".xml");
+ Files.copy(TikaPipesTest.class.getResourceAsStream("/log4j2.xml"), TIKA_PIPES_LOG4j2_PATH,
+ StandardCopyOption.REPLACE_EXISTING);
TIKA_CONFIG_XML =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" +
"<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" +
@@ -114,8 +118,12 @@ public class TikaPipesTest extends CXFTestBase {
"</emitter>" +
"</emitters>" + "<pipes><params><tikaConfig>" +
ProcessUtils.escapeCommandLine(TIKA_CONFIG_PATH.toAbsolutePath().toString()) +
- "</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m" +
- "</arg></forkedJvmArgs>" +
+ "</tikaConfig><numClients>10</numClients>" +
+ "<forkedJvmArgs>" +
+ "<arg>-Xmx256m</arg>" +
+ "<arg>-Dlog4j.configurationFile=file:" +
+ ProcessUtils.escapeCommandLine(TIKA_PIPES_LOG4j2_PATH.toAbsolutePath().toString()) + "</arg>" +
+ "</forkedJvmArgs>" +
"</params></pipes>" + "</properties>";
Files.write(TIKA_CONFIG_PATH, TIKA_CONFIG_XML.getBytes(StandardCharsets.UTF_8));
}