You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/07/29 14:52:04 UTC
[tika] branch master updated: include opennlp lang model in
tika-eval during assembly convert paths to OS-independent paths in unit
tests add headless in tests
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 862f51a include opennlp lang model in tika-eval during assembly convert paths to OS-independent paths in unit tests add headless in tests
862f51a is described below
commit 862f51af1d75f45ea25aecefa5aee024afc4358b
Author: TALLISON <ta...@apache.org>
AuthorDate: Mon Jul 29 10:51:42 2019 -0400
include opennlp lang model in tika-eval during assembly
convert paths to OS-independent paths in unit tests
add headless in tests
---
assembly.xml | 3 +-
.../tika/cli/TikaCLIBatchCommandLineTest.java | 5 +-
.../org/apache/tika/batch/fs/FSBatchTestBase.java | 2 +
.../java/org/apache/tika/eval/TikaEvalCLITest.java | 57 +++++++++++++++++-----
4 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/assembly.xml b/assembly.xml
index 3cbf862..4095cac 100644
--- a/assembly.xml
+++ b/assembly.xml
@@ -26,7 +26,8 @@
<excludes>
<exclude>**/target/**</exclude>
<exclude>**/.*/**</exclude>
- <exclude>**/opennlp/*.bin</exclude>
+ <exclude>**/opennlp/ner-*.bin</exclude>
+ <exclude>**/opennlp/en-*.bin</exclude>
<exclude>**/recognition/*.bin</exclude>
<exclude>**/*.releaseBackup</exclude>
</excludes>
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
index 2eb2677..a5f95ea 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
@@ -31,6 +31,7 @@ import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
+import org.apache.tika.utils.ProcessUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -176,9 +177,7 @@ public class TikaCLIBatchCommandLineTest {
boolean ex = false;
try {
String path = testFile.toAbsolutePath().toString();
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
+ path = ProcessUtils.escapeCommandLine(path);
String[] params = {testInputPathForCommandLine, path};
String[] commandLine = BatchCommandLineBuilder.build(params);
diff --git a/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java b/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
index 3ce7198..ec0c3d9 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
@@ -170,6 +170,7 @@ public abstract class FSBatchTestBase extends TikaTest {
private String[] commandLine(String testConfig, String loggerProps, String[] args) {
List<String> commandLine = new ArrayList<>();
commandLine.add("java");
+ commandLine.add("-Djava.awt.headless=true");
commandLine.add("-Dlog4j.configuration=file:"+
this.getClass().getResource(loggerProps).getFile());
commandLine.add("-Xmx128m");
@@ -200,6 +201,7 @@ public abstract class FSBatchTestBase extends TikaTest {
String[] args) throws Exception {
List<String> commandLine = new ArrayList<>();
commandLine.add("java");
+ commandLine.add("-Djava.awt.headless=true");
commandLine.add("-Xmx128m");
commandLine.add("-cp");
String cp = System.getProperty("java.class.path");
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
index ea06f70..5314712 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
@@ -35,6 +35,7 @@ import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.tika.TikaTest;
+import org.apache.tika.utils.ProcessUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
@@ -123,9 +124,14 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
args.add("Compare");
args.add("-extractsA");
- args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ extractsDir.resolve("extractsA")
+ .toAbsolutePath().toString()));
args.add("-extractsB");
- args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
+ args.add(ProcessUtils.escapeCommandLine(
+ extractsDir.resolve("extractsB")
+ .toAbsolutePath().toString()));
//add these just to confirm this info doesn't cause problems w cli
args.add("-maxTokens");
args.add("10000000");
@@ -135,7 +141,10 @@ public class TikaEvalCLITest extends TikaTest {
args.add("100000");
args.add("-db");
- args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ compareDBDir
+ .toAbsolutePath().toString()+"/"+dbName));
execute(args, 60000);
@@ -145,7 +154,9 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
args.add("Profile");
args.add("-extracts");
- args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+ args.add(ProcessUtils.escapeCommandLine(
+ extractsDir.resolve("extractsA")
+ .toAbsolutePath().toString()));
//add these just to confirm this info doesn't cause problems w cli
args.add("-maxTokens");
args.add("10000000");
@@ -155,7 +166,10 @@ public class TikaEvalCLITest extends TikaTest {
args.add("100000");
args.add("-db");
- args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ profileDBDir
+ .toAbsolutePath().toString()+"/"+dbName));
execute(args, 60000);
}
@@ -163,9 +177,14 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
args.add("Report");
args.add("-db");
- args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ profileDBDir.toAbsolutePath()
+ .toString()+"/"+dbName));
args.add("-rd");
- args.add(profileReportsDir.toAbsolutePath().toString());
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ profileReportsDir.toAbsolutePath().toString()));
execute(args, 60000);
}
@@ -173,9 +192,13 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
args.add("Report");
args.add("-db");
- args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ compareDBDir.toAbsolutePath().toString()+"/"+dbName));
args.add("-rd");
- args.add(compareReportsDir.toAbsolutePath().toString());
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ compareReportsDir.toAbsolutePath().toString()));
execute(args, 60000);
}
@@ -186,11 +209,20 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
args.add("Compare");
args.add("-extractsA");
- args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ extractsDir.resolve("extractsA")
+ .toAbsolutePath().toString()));
args.add("-extractsB");
- args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ extractsDir.resolve("extractsB")
+ .toAbsolutePath().toString()));
args.add("-db");
- args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+ args.add(
+ ProcessUtils.escapeCommandLine(
+ compareDBDir.toAbsolutePath()
+ .toString()+"/"+dbName));
execute(args, 60000);
// args.add("-drop");
@@ -202,6 +234,7 @@ public class TikaEvalCLITest extends TikaTest {
List<String> args = new ArrayList<>();
String cp = System.getProperty("java.class.path");
args.add("java");
+ args.add("-Djava.awt.headless=true");
args.add("-cp");
args.add(cp);
args.add("org.apache.tika.eval.TikaEvalCLI");