You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:59:57 UTC
svn commit: r1706060 [2/2] - in /tika/trunk: ./
tika-app/src/main/java/org/apache/tika/cli/
tika-batch/src/main/java/org/apache/tika/batch/
tika-batch/src/main/java/org/apache/tika/batch/fs/
tika-batch/src/main/java/org/apache/tika/batch/fs/builders/ t...
Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java Wed Sep 30 15:59:57 2015
@@ -17,11 +17,21 @@ package org.apache.tika.batch.fs;
* limitations under the License.
*/
-import java.io.File;
+import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.nio.file.DirectoryStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
@@ -29,7 +39,6 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
-import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.tika.TikaTest;
import org.apache.tika.batch.BatchProcess;
@@ -56,17 +65,13 @@ import org.junit.BeforeClass;
*/
public abstract class FSBatchTestBase extends TikaTest {
- private static File outputRoot = null;
+ private static Path outputRoot = null;
@BeforeClass
public static void setUp() throws Exception {
-
- File testOutput = new File("target/test-classes/test-output");
- testOutput.mkdirs();
- outputRoot = File.createTempFile("tika-batch-output-root-", "", testOutput);
- outputRoot.delete();
- outputRoot.mkdirs();
-
+ Path testOutput = Paths.get("target/test-classes/test-output");
+ Files.createDirectories(testOutput);
+ outputRoot = Files.createTempDirectory(testOutput, "tika-batch-output-root-");
}
@AfterClass
@@ -75,7 +80,7 @@ public abstract class FSBatchTestBase ex
//see caveat in TikaCLITest's textExtract
try {
- FileUtils.deleteDirectory(outputRoot);
+ deleteDirectory(outputRoot);
} catch (IOException e) {
e.printStackTrace();
}
@@ -92,32 +97,33 @@ public abstract class FSBatchTestBase ex
}
}
- File getNewOutputDir(String subdirPrefix) throws IOException {
- File outputDir = File.createTempFile(subdirPrefix, "", outputRoot);
- outputDir.delete();
- outputDir.mkdirs();
+ Path getNewOutputDir(String subdirPrefix) throws IOException {
+ Path outputDir = Files.createTempDirectory(outputRoot, subdirPrefix);
+ assert(countChildren(outputDir) == 0);
return outputDir;
}
- Map<String, String> getDefaultArgs(String inputSubDir, File outputDir) throws Exception {
- Map<String, String> args = new HashMap<String, String>();
- args.put("inputDir", "\""+getInputRoot(inputSubDir).getAbsolutePath()+"\"");
+ Map<String, String> getDefaultArgs(String inputSubDir, Path outputDir) throws Exception {
+ Map<String, String> args = new HashMap<>();
+
+ args.put("inputDir", "\""+getInputRoot(inputSubDir).toString()+"\"");
if (outputDir != null) {
- args.put("outputDir", "\""+outputDir.getAbsolutePath()+"\"");
+ args.put("outputDir", "\""+outputDir.toString()+"\"");
}
return args;
}
- public String[] getDefaultCommandLineArgsArr(String inputSubDir, File outputDir, Map<String, String> commandLine) throws Exception {
- List<String> args = new ArrayList<String>();
+ public String[] getDefaultCommandLineArgsArr(String inputSubDir,
+ Path outputDir, Map<String, String> commandLine) throws Exception {
+ List<String> args = new ArrayList<>();
//need to include "-" because these are going to the commandline!
if (inputSubDir != null) {
args.add("-inputDir");
- args.add(getInputRoot(inputSubDir).getAbsolutePath());
+ args.add(getInputRoot(inputSubDir).toAbsolutePath().toString());
}
if (outputDir != null) {
args.add("-outputDir");
- args.add(outputDir.getAbsolutePath());
+ args.add(outputDir.toAbsolutePath().toString());
}
if (commandLine != null) {
for (Map.Entry<String, String> e : commandLine.entrySet()) {
@@ -129,9 +135,9 @@ public abstract class FSBatchTestBase ex
}
- public File getInputRoot(String subdir) throws Exception {
+ public Path getInputRoot(String subdir) throws Exception {
String path = (subdir == null || subdir.length() == 0) ? "/test-input" : "/test-input/"+subdir;
- return new File(this.getClass().getResource(path).toURI());
+ return Paths.get(this.getClass().getResource(path).toURI());
}
BatchProcess getNewBatchRunner(String testConfig,
@@ -145,18 +151,19 @@ public abstract class FSBatchTestBase ex
}
public ProcessBuilder getNewBatchRunnerProcess(String testConfig, Map<String, String> args) {
- List<String> argList = new ArrayList<String>();
+ List<String> argList = new ArrayList<>();
for (Map.Entry<String, String> e : args.entrySet()) {
argList.add("-"+e.getKey());
argList.add(e.getValue());
}
- String[] fullCommandLine = commandLine(testConfig, argList.toArray(new String[argList.size()]));
+ String[] fullCommandLine = commandLine(testConfig,
+ argList.toArray(new String[argList.size()]));
return new ProcessBuilder(fullCommandLine);
}
private String[] commandLine(String testConfig, String[] args) {
- List<String> commandLine = new ArrayList<String>();
+ List<String> commandLine = new ArrayList<>();
commandLine.add("java");
commandLine.add("-Dlog4j.configuration=file:"+
this.getClass().getResource("/log4j_process.properties").getFile());
@@ -171,9 +178,14 @@ public abstract class FSBatchTestBase ex
commandLine.add(cp);
commandLine.add("org.apache.tika.batch.fs.FSBatchProcessCLI");
- String configFile = this.getClass().getResource(testConfig).getFile();
- commandLine.add("-bc");
+ String configFile = null;
+ try {
+ configFile = Paths.get(this.getClass().getResource(testConfig).toURI()).toAbsolutePath().toString();
+ } catch (URISyntaxException e) {
+ e.printStackTrace();
+ }
+ commandLine.add("-bc");
commandLine.add(configFile);
for (String s : args) {
@@ -184,7 +196,7 @@ public abstract class FSBatchTestBase ex
public BatchProcessDriverCLI getNewDriver(String testConfig,
String[] args) throws Exception {
- List<String> commandLine = new ArrayList<String>();
+ List<String> commandLine = new ArrayList<>();
commandLine.add("java");
commandLine.add("-Xmx128m");
commandLine.add("-cp");
@@ -197,7 +209,8 @@ public abstract class FSBatchTestBase ex
commandLine.add(cp);
commandLine.add("org.apache.tika.batch.fs.FSBatchProcessCLI");
- String configFile = this.getClass().getResource(testConfig).getFile();
+ String configFile = Paths.get(
+ this.getClass().getResource(testConfig).toURI()).toAbsolutePath().toString();
commandLine.add("-bc");
commandLine.add(configFile);
@@ -217,4 +230,72 @@ public abstract class FSBatchTestBase ex
Future<ParallelFileProcessingResult> futureResult = executor.submit(process);
return futureResult.get(10, TimeUnit.SECONDS);
}
+
+ /**
+ * Counts immediate children only, does not work recursively
+ * @param p
+ * @return
+ * @throws IOException
+ */
+ public static int countChildren(Path p) throws IOException {
+ int i = 0;
+ try (DirectoryStream<Path> ds = Files.newDirectoryStream(p)) {
+ Iterator<Path> it = ds.iterator();
+ while (it.hasNext()) {
+ i++;
+ it.next();
+ }
+ }
+ return i;
+ }
+
+ //REMOVE THIS AND USE FileUtils, once a java 7 option has been added.
+ public static String readFileToString(Path p, Charset cs) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ try (BufferedReader r = Files.newBufferedReader(p, cs)) {
+ String line = r.readLine();
+ while (line != null) {
+ sb.append(line).append("\n");
+ line = r.readLine();
+ }
+ }
+ return sb.toString();
+ }
+
+ //TODO: move this into FileUtils
+ public static void deleteDirectory(Path dir) throws IOException {
+ Files.walkFileTree(dir, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path file,
+ BasicFileAttributes attrs) throws IOException {
+ Files.delete(file);
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir,
+ IOException exc) throws IOException {
+ Files.delete(dir);
+ return FileVisitResult.CONTINUE;
+ }
+
+ });
+ }
+
+ /**
+ * helper method equivalent to File#listFiles()
+ * grabs children only, does not walk recursively
+ * @param p
+ * @return
+ */
+ public static List<Path> listPaths(Path p) throws IOException {
+ List<Path> list = new ArrayList<>();
+ try (DirectoryStream<Path> ds = Files.newDirectoryStream(p)) {
+ Iterator<Path> it = ds.iterator();
+ while (it.hasNext()) {
+ list.add(it.next());
+ }
+ }
+ return list;
+ }
}
Added: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java?rev=1706060&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java (added)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java Wed Sep 30 15:59:57 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.batch.fs;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Test;
+
+public class FSFileResourceTest {
+ @Test
+ public void testRelativization() throws Exception {
+ //test assertion error if alleged child is not actually child
+ Path root = Paths.get("root/abc/def").toAbsolutePath();
+ Path allegedChild = Paths.get(root.getParent().getParent().toAbsolutePath().toString());
+ try {
+ FSFileResource r = new FSFileResource(root, allegedChild);
+ fail("should have had assertion error: alleged child not actually child of root");
+ } catch (AssertionError e) {
+
+ }
+
+ //test regular workings
+ root = Paths.get("root/abc/def");
+ Path child = Paths.get(root.toString(), "ghi/jkl/lmnop.doc");
+ FSFileResource r = new FSFileResource(root, child);
+ String id = r.getResourceId();
+ assertTrue(id.startsWith("ghi"));
+ assertTrue(id.endsWith("lmnop.doc"));
+ }
+}
Added: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java?rev=1706060&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java (added)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java Wed Sep 30 15:59:57 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.batch.fs;
+
+import static org.junit.Assert.assertTrue;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Test;
+
+public class FSUtilTest {
+
+ @Test
+ public void testSafeResolution() throws Exception {
+ Path cwd = Paths.get(".");
+ String windows = "C:/temp/file.txt";
+ String linux = "/root/dir/file.txt";
+ boolean ex = false;
+ try {
+ FSUtil.resolveRelative(cwd, windows);
+ } catch (IllegalArgumentException e) {
+ ex = true;
+ }
+
+ try {
+ FSUtil.resolveRelative(cwd, linux);
+ } catch (IllegalArgumentException e) {
+ ex = true;
+ }
+
+ assertTrue("IllegalArgumentException should have been thrown", ex);
+ }
+
+}
Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java Wed Sep 30 15:59:57 2015
@@ -21,10 +21,9 @@ import static java.nio.charset.StandardC
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
-import java.io.File;
+import java.nio.file.Path;
import java.util.Map;
-import org.apache.commons.io.FileUtils;
import org.apache.tika.batch.BatchProcess;
import org.apache.tika.batch.ParallelFileProcessingResult;
import org.junit.Test;
@@ -34,15 +33,15 @@ public class HandlerBuilderTest extends
@Test
public void testXML() throws Exception {
- File outputDir = getNewOutputDir("handler-xml-");
+ Path outputDir = getNewOutputDir("handler-xml-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("basicHandlerType", "xml");
args.put("outputSuffix", "xml");
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- File outputFile = new File(outputDir, "test0.xml.xml");
- String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+ Path outputFile = outputDir.resolve("test0.xml.xml");
+ String resultString = readFileToString(outputFile, UTF_8);
assertTrue(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
assertTrue(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -51,15 +50,15 @@ public class HandlerBuilderTest extends
@Test
public void testHTML() throws Exception {
- File outputDir = getNewOutputDir("handler-html-");
+ Path outputDir = getNewOutputDir("handler-html-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("basicHandlerType", "html");
args.put("outputSuffix", "html");
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- File outputFile = new File(outputDir, "test0.xml.html");
- String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+ Path outputFile = outputDir.resolve("test0.xml.html");
+ String resultString = readFileToString(outputFile, UTF_8);
assertTrue(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
assertFalse(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -67,7 +66,7 @@ public class HandlerBuilderTest extends
@Test
public void testText() throws Exception {
- File outputDir = getNewOutputDir("handler-txt-");
+ Path outputDir = getNewOutputDir("handler-txt-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("basicHandlerType", "txt");
@@ -75,8 +74,8 @@ public class HandlerBuilderTest extends
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- File outputFile = new File(outputDir, "test0.xml.txt");
- String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+ Path outputFile = outputDir.resolve("test0.xml.txt");
+ String resultString = readFileToString(outputFile, UTF_8);
assertFalse(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
assertFalse(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -85,7 +84,7 @@ public class HandlerBuilderTest extends
@Test
public void testXMLWithWriteLimit() throws Exception {
- File outputDir = getNewOutputDir("handler-xml-write-limit-");
+ Path outputDir = getNewOutputDir("handler-xml-write-limit-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("writeLimit", "5");
@@ -93,8 +92,8 @@ public class HandlerBuilderTest extends
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- File outputFile = new File(outputDir, "test0.xml.xml");
- String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+ Path outputFile = outputDir.resolve("test0.xml.xml");
+ String resultString = readFileToString(outputFile, UTF_8);
//this is not ideal. How can we change handlers to writeout whatever
//they've gotten so far, up to the writeLimit?
assertTrue(resultString.equals(""));
@@ -102,7 +101,7 @@ public class HandlerBuilderTest extends
@Test
public void testRecursiveParserWrapper() throws Exception {
- File outputDir = getNewOutputDir("handler-recursive-parser");
+ Path outputDir = getNewOutputDir("handler-recursive-parser");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("basicHandlerType", "txt");
@@ -111,8 +110,8 @@ public class HandlerBuilderTest extends
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- File outputFile = new File(outputDir, "test0.xml.json");
- String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+ Path outputFile = outputDir.resolve("test0.xml.json");
+ String resultString = readFileToString(outputFile, UTF_8);
assertTrue(resultString.contains("\"author\":\"Nikolai Lobachevsky\""));
assertTrue(resultString.contains("tika-batch\\u0027s first test file"));
}
Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java Wed Sep 30 15:59:57 2015
@@ -20,7 +20,7 @@ package org.apache.tika.batch.fs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import java.io.File;
+import java.nio.file.Path;
import java.util.Map;
import java.util.concurrent.ExecutionException;
@@ -33,11 +33,11 @@ public class OutputStreamFactoryTest ext
@Test
public void testIllegalState() throws Exception {
- File outputDir = getNewOutputDir("os-factory-illegal-state-");
+ Path outputDir = getNewOutputDir("os-factory-illegal-state-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
run(runner);
- assertEquals(1, outputDir.listFiles().length);
+ assertEquals(1, countChildren(outputDir));
boolean illegalState = false;
try {
@@ -52,16 +52,16 @@ public class OutputStreamFactoryTest ext
@Test
public void testSkip() throws Exception {
- File outputDir = getNewOutputDir("os-factory-skip-");
+ Path outputDir = getNewOutputDir("os-factory-skip-");
Map<String, String> args = getDefaultArgs("basic", outputDir);
args.put("handleExisting", "skip");
BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
ParallelFileProcessingResult result = run(runner);
- assertEquals(1, outputDir.listFiles().length);
+ assertEquals(1, countChildren(outputDir));
runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
result = run(runner);
- assertEquals(1, outputDir.listFiles().length);
+ assertEquals(1, countChildren(outputDir));
}
/* turn this back on if there is any need to add "handleExisting"