You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/07/29 14:52:04 UTC

[tika] branch master updated: include opennlp lang model in tika-eval during assembly convert paths to OS-independent paths in unit tests add headless in tests

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 862f51a  include opennlp lang model in tika-eval during assembly convert paths to OS-independent paths in unit tests add headless in tests
862f51a is described below

commit 862f51af1d75f45ea25aecefa5aee024afc4358b
Author: TALLISON <ta...@apache.org>
AuthorDate: Mon Jul 29 10:51:42 2019 -0400

    include opennlp lang model in tika-eval during assembly
    convert paths to OS-independent paths in unit tests
    add headless in tests
---
 assembly.xml                                       |  3 +-
 .../tika/cli/TikaCLIBatchCommandLineTest.java      |  5 +-
 .../org/apache/tika/batch/fs/FSBatchTestBase.java  |  2 +
 .../java/org/apache/tika/eval/TikaEvalCLITest.java | 57 +++++++++++++++++-----
 4 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/assembly.xml b/assembly.xml
index 3cbf862..4095cac 100644
--- a/assembly.xml
+++ b/assembly.xml
@@ -26,7 +26,8 @@
       <excludes>
         <exclude>**/target/**</exclude>
         <exclude>**/.*/**</exclude>
-        <exclude>**/opennlp/*.bin</exclude>
+        <exclude>**/opennlp/ner-*.bin</exclude>
+        <exclude>**/opennlp/en-*.bin</exclude>
         <exclude>**/recognition/*.bin</exclude>
         <exclude>**/*.releaseBackup</exclude>
       </excludes>
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
index 2eb2677..a5f95ea 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
@@ -31,6 +31,7 @@ import java.util.Map;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
+import org.apache.tika.utils.ProcessUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -176,9 +177,7 @@ public class TikaCLIBatchCommandLineTest {
         boolean ex = false;
         try {
             String path = testFile.toAbsolutePath().toString();
-            if (path.contains(" ")) {
-                path = "\"" + path + "\"";
-            }
+            path = ProcessUtils.escapeCommandLine(path);
             String[] params = {testInputPathForCommandLine, path};
 
             String[] commandLine = BatchCommandLineBuilder.build(params);
diff --git a/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java b/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
index 3ce7198..ec0c3d9 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
@@ -170,6 +170,7 @@ public abstract class FSBatchTestBase extends TikaTest {
     private String[] commandLine(String testConfig, String loggerProps, String[] args) {
         List<String> commandLine = new ArrayList<>();
         commandLine.add("java");
+        commandLine.add("-Djava.awt.headless=true");
         commandLine.add("-Dlog4j.configuration=file:"+
             this.getClass().getResource(loggerProps).getFile());
         commandLine.add("-Xmx128m");
@@ -200,6 +201,7 @@ public abstract class FSBatchTestBase extends TikaTest {
                                               String[] args) throws Exception {
         List<String> commandLine = new ArrayList<>();
         commandLine.add("java");
+        commandLine.add("-Djava.awt.headless=true");
         commandLine.add("-Xmx128m");
         commandLine.add("-cp");
         String cp = System.getProperty("java.class.path");
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
index ea06f70..5314712 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
@@ -35,6 +35,7 @@ import java.util.Set;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.TikaTest;
+import org.apache.tika.utils.ProcessUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
@@ -123,9 +124,14 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         args.add("Compare");
         args.add("-extractsA");
-        args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        extractsDir.resolve("extractsA")
+                                .toAbsolutePath().toString()));
         args.add("-extractsB");
-        args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
+        args.add(ProcessUtils.escapeCommandLine(
+                extractsDir.resolve("extractsB")
+                        .toAbsolutePath().toString()));
         //add these just to confirm this info doesn't cause problems w cli
         args.add("-maxTokens");
         args.add("10000000");
@@ -135,7 +141,10 @@ public class TikaEvalCLITest extends TikaTest {
         args.add("100000");
 
         args.add("-db");
-        args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        compareDBDir
+                                .toAbsolutePath().toString()+"/"+dbName));
 
         execute(args, 60000);
 
@@ -145,7 +154,9 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         args.add("Profile");
         args.add("-extracts");
-        args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+        args.add(ProcessUtils.escapeCommandLine(
+                extractsDir.resolve("extractsA")
+                        .toAbsolutePath().toString()));
         //add these just to confirm this info doesn't cause problems w cli
         args.add("-maxTokens");
         args.add("10000000");
@@ -155,7 +166,10 @@ public class TikaEvalCLITest extends TikaTest {
         args.add("100000");
 
         args.add("-db");
-        args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        profileDBDir
+                                .toAbsolutePath().toString()+"/"+dbName));
         execute(args, 60000);
     }
 
@@ -163,9 +177,14 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         args.add("Report");
         args.add("-db");
-        args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        profileDBDir.toAbsolutePath()
+                                .toString()+"/"+dbName));
         args.add("-rd");
-        args.add(profileReportsDir.toAbsolutePath().toString());
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        profileReportsDir.toAbsolutePath().toString()));
         execute(args, 60000);
     }
 
@@ -173,9 +192,13 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         args.add("Report");
         args.add("-db");
-        args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        compareDBDir.toAbsolutePath().toString()+"/"+dbName));
         args.add("-rd");
-        args.add(compareReportsDir.toAbsolutePath().toString());
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        compareReportsDir.toAbsolutePath().toString()));
         execute(args, 60000);
     }
 
@@ -186,11 +209,20 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         args.add("Compare");
         args.add("-extractsA");
-        args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        extractsDir.resolve("extractsA")
+                                .toAbsolutePath().toString()));
         args.add("-extractsB");
-        args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        extractsDir.resolve("extractsB")
+                                .toAbsolutePath().toString()));
         args.add("-db");
-        args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        compareDBDir.toAbsolutePath()
+                                .toString()+"/"+dbName));
 
         execute(args, 60000);
         //      args.add("-drop");
@@ -202,6 +234,7 @@ public class TikaEvalCLITest extends TikaTest {
         List<String> args = new ArrayList<>();
         String cp = System.getProperty("java.class.path");
         args.add("java");
+        args.add("-Djava.awt.headless=true");
         args.add("-cp");
         args.add(cp);
         args.add("org.apache.tika.eval.TikaEvalCLI");