You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:59:57 UTC
svn commit: r1706060 [1/2] - in /tika/trunk: ./
tika-app/src/main/java/org/apache/tika/cli/
tika-batch/src/main/java/org/apache/tika/batch/
tika-batch/src/main/java/org/apache/tika/batch/fs/
tika-batch/src/main/java/org/apache/tika/batch/fs/builders/ t...
Author: tallison
Date: Wed Sep 30 15:59:57 2015
New Revision: 1706060
URL: http://svn.apache.org/viewvc?rev=1706060&view=rev
Log:
TIKA-1747: migrate to Path from File in tika-batch
Added:
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Wed Sep 30 15:59:57 2015
@@ -1,5 +1,10 @@
Release 1.11 - Current Development
+ * Upgraded tika-batch to use Path throughout (TIKA-1747 and
+ (TIKA-1754).
+
+ * Upgraded to Path in TikaInputStream (via Yaniv Kunda) (TIKA-1744).
+
* Changed default content handler type for "/rmeta" in tika-server
to "xml" to align with "-J" option in tika-app.
Clients can now specify handler types via PathParam. (TIKA-1716).
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java Wed Sep 30 15:59:57 2015
@@ -17,8 +17,10 @@
package org.apache.tika.cli;
-import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
@@ -132,14 +134,15 @@ class BatchCommandLineBuilder {
//if there are only two args and they are both directories, treat the first
//as input and the second as output.
if (args.length == 2 && !args[0].startsWith("-") && ! args[1].startsWith("-")) {
- File candInput = new File(args[0]);
- File candOutput = new File(args[1]);
- if (candOutput.isFile()) {
+ Path candInput = Paths.get(args[0]);
+ Path candOutput = Paths.get(args[1]);
+
+ if (Files.isRegularFile(candOutput)) {
throw new IllegalArgumentException("Can't specify an existing file as the "+
"second argument for the output directory of a batch process");
}
- if (candInput.isDirectory()){
+ if (Files.isDirectory(candInput)) {
map.put("-inputDir", args[0]);
map.put("-outputDir", args[1]);
}
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Wed Sep 30 15:59:57 2015
@@ -16,6 +16,8 @@
*/
package org.apache.tika.cli;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
@@ -40,6 +42,9 @@ import java.net.Socket;
import java.net.URI;
import java.net.URL;
import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Enumeration;
@@ -106,8 +111,6 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
/**
* Simple command line interface for Apache Tika.
*/
@@ -650,9 +653,10 @@ public class TikaCLI {
private boolean testForBatch(String[] args) {
if (args.length == 2 && ! args[0].startsWith("-")
&& ! args[1].startsWith("-")) {
- File inputCand = new File(args[0]);
- File outputCand = new File(args[1]);
- if (inputCand.isDirectory() && !outputCand.isFile()) {
+ Path inputCand = Paths.get(args[0]);
+ Path outputCand = Paths.get(args[1]);
+ if (Files.isDirectory(inputCand) &&
+ !Files.isRegularFile(outputCand)) {
return true;
}
}
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java Wed Sep 30 15:59:57 2015
@@ -16,15 +16,17 @@ package org.apache.tika.batch;
* limitations under the License.
*/
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import java.io.BufferedInputStream;
import java.io.BufferedReader;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@@ -33,8 +35,6 @@ import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
public class BatchProcessDriverCLI {
/**
@@ -252,7 +252,7 @@ public class BatchProcessDriverCLI {
private void start() throws Exception {
ProcessBuilder builder = new ProcessBuilder(commandLine);
- builder.directory(new File("."));
+ builder.directory(Paths.get(".").toFile());
process = builder.start();
errorWatcher = new StreamWatcher(process.getErrorStream());
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java Wed Sep 30 15:59:57 2015
@@ -17,9 +17,10 @@ package org.apache.tika.batch.fs;
* limitations under the License.
*/
-import java.io.File;
import java.io.IOException;
import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
@@ -68,7 +69,7 @@ public class FSBatchProcessCLI {
private TikaInputStream getConfigInputStream(String[] args, boolean logDefault) throws IOException {
TikaInputStream is = null;
- File batchConfigFile = getConfigFile(args);
+ Path batchConfigFile = getConfigFile(args);
if (batchConfigFile != null) {
//this will throw IOException if it can't find a specified config file
//better to throw an exception than silently back off to default.
@@ -131,12 +132,12 @@ public class FSBatchProcessCLI {
System.exit(result.getExitStatus());
}
- private File getConfigFile(String[] args) {
- File configFile = null;
+ private Path getConfigFile(String[] args) {
+ Path configFile = null;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-bc") || args[i].equals("-batch-config")) {
if (i < args.length-1) {
- configFile = new File(args[i+1]);
+ configFile = Paths.get(args[i+1]);
}
}
}
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java Wed Sep 30 15:59:57 2015
@@ -16,11 +16,15 @@ package org.apache.tika.batch.fs;
* limitations under the License.
*/
-import java.io.File;
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
@@ -36,33 +40,35 @@ public class FSDirectoryCrawler extends
OS_ORDER //operating system chooses
}
- private final File root;
- private final File startDirectory;
- private final Comparator<File> fileComparator = new FileNameComparator();
+ private final Path root;
+ private final Path startDirectory;
+ private final Comparator<Path> pathComparator = new FileNameComparator();
private CRAWL_ORDER crawlOrder;
public FSDirectoryCrawler(ArrayBlockingQueue<FileResource> fileQueue,
- int numConsumers, File root, CRAWL_ORDER crawlOrder) {
+ int numConsumers, Path root, CRAWL_ORDER crawlOrder) {
super(fileQueue, numConsumers);
this.root = root;
this.startDirectory = root;
this.crawlOrder = crawlOrder;
- if (! startDirectory.isDirectory()) {
- throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.getAbsolutePath());
+ if (!Files.isDirectory(startDirectory)) {
+ throw new RuntimeException("Crawler couldn't find this directory:" +
+ startDirectory.toAbsolutePath());
}
}
public FSDirectoryCrawler(ArrayBlockingQueue<FileResource> fileQueue,
- int numConsumers, File root, File startDirectory,
+ int numConsumers, Path root, Path startDirectory,
CRAWL_ORDER crawlOrder) {
super(fileQueue, numConsumers);
this.root = root;
this.startDirectory = startDirectory;
this.crawlOrder = crawlOrder;
- assert(FSUtil.checkThisIsAncestorOfOrSameAsThat(root, startDirectory));
- if (! startDirectory.isDirectory()) {
- throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.getAbsolutePath());
+ assert(startDirectory.toAbsolutePath().startsWith(root.toAbsolutePath()));
+
+ if (! Files.isDirectory(startDirectory)) {
+ throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.toAbsolutePath());
}
}
@@ -70,58 +76,63 @@ public class FSDirectoryCrawler extends
addFiles(startDirectory);
}
- private void addFiles(File directory) throws InterruptedException {
+ private void addFiles(Path directory) throws InterruptedException {
- if (directory == null ||
- !directory.isDirectory() || !directory.canRead()) {
- String path = "null path";
- if (directory != null) {
- path = directory.getAbsolutePath();
- }
- logger.warn("FSFileAdder can't read this directory: " + path);
+ if (directory == null) {
+ logger.warn("FSFileAdder asked to process null directory?!");
return;
}
- List<File> directories = new ArrayList<File>();
- File[] fileArr = directory.listFiles();
- if (fileArr == null) {
- logger.info("Empty directory: " + directory.getAbsolutePath());
+ List<Path> files = new ArrayList<>();
+ try (DirectoryStream ds = Files.newDirectoryStream(directory)){
+ Iterator<Path> it = ds.iterator();
+ while (it.hasNext()) {
+ files.add(it.next());
+ }
+ } catch (IOException e) {
+ logger.warn("FSFileAdder couldn't read "+directory.toAbsolutePath() +
+ ": "+e.getMessage());
+ }
+ if (files.size() == 0) {
+ logger.info("Empty directory: " + directory.toAbsolutePath());
return;
}
- List<File> files = new ArrayList<File>(Arrays.asList(fileArr));
if (crawlOrder == CRAWL_ORDER.RANDOM) {
Collections.shuffle(files);
} else if (crawlOrder == CRAWL_ORDER.SORTED) {
- Collections.sort(files, fileComparator);
+ Collections.sort(files, pathComparator);
}
int numFiles = 0;
- for (File f : files) {
+ List<Path> directories = new LinkedList<>();
+ for (Path f : files) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("file adder interrupted");
}
-
- if (f.isFile()) {
- numFiles++;
- if (numFiles == 1) {
- handleFirstFileInDirectory(f);
- }
+ if (!Files.isReadable(f)) {
+ logger.warn("Skipping -- "+f.toAbsolutePath()+
+ " -- file/directory is not readable");
+ continue;
}
- if (f.isDirectory()) {
+ if (Files.isDirectory(f)) {
directories.add(f);
continue;
}
+ numFiles++;
+ if (numFiles == 1) {
+ handleFirstFileInDirectory(f);
+ }
int added = tryToAdd(new FSFileResource(root, f));
if (added == FileResourceCrawler.STOP_NOW) {
- logger.debug("crawler has hit a limit: "+f.getAbsolutePath() + " : " + added);
+ logger.debug("crawler has hit a limit: "+f.toAbsolutePath() + " : " + added);
return;
}
- logger.debug("trying to add: "+f.getAbsolutePath() + " : " + added);
+ logger.debug("trying to add: "+f.toAbsolutePath() + " : " + added);
}
- for (File f : directories) {
+ for (Path f : directories) {
addFiles(f);
}
}
@@ -135,21 +146,21 @@ public class FSDirectoryCrawler extends
*
* @param f file to handle
*/
- public void handleFirstFileInDirectory(File f) {
+ public void handleFirstFileInDirectory(Path f) {
//no-op
}
//simple lexical order for the file name, we don't really care about localization.
//we do want this, though, because file.compareTo behaves differently
//on different OS's.
- private class FileNameComparator implements Comparator<File> {
+ private class FileNameComparator implements Comparator<Path> {
@Override
- public int compare(File f1, File f2) {
+ public int compare(Path f1, Path f2) {
if (f1 == null || f2 == null) {
return 0;
}
- return f1.getName().compareTo(f2.getName());
+ return f1.getFileName().toString().compareTo(f2.getFileName().toString());
}
}
}
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java Wed Sep 30 15:59:57 2015
@@ -20,6 +20,9 @@ package org.apache.tika.batch.fs;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Locale;
import org.apache.tika.batch.FileResource;
@@ -39,21 +42,49 @@ import org.apache.tika.metadata.Metadata
*/
public class FSFileResource implements FileResource {
- private final File fullPath;
+ private final Path fullPath;
private final String relativePath;
private final Metadata metadata;
+ /**
+ *
+ * @param inputRoot
+ * @param fullPath
+ * @see FSFileResource#FSFileResource(Path, Path)
+ * @deprecated to be removed in Tika 2.0
+ */
+ @Deprecated
public FSFileResource(File inputRoot, File fullPath) {
+ this(Paths.get(inputRoot.getAbsolutePath()),
+ Paths.get(fullPath.getAbsolutePath()));
+ }
+
+ /**
+ * Constructor
+ *
+ * @param inputRoot the input root for the file
+ * @param fullPath the full path to the file
+ * @throws IllegalArgumentException if the fullPath is not
+ * a child of inputRoot
+ */
+ public FSFileResource(Path inputRoot, Path fullPath) {
this.fullPath = fullPath;
this.metadata = new Metadata();
//child path must actually be a child
- assert(FSUtil.checkThisIsAncestorOfThat(inputRoot, fullPath));
- this.relativePath = fullPath.getAbsolutePath().substring(inputRoot.getAbsolutePath().length()+1);
+ assert(fullPath.toAbsolutePath().startsWith(inputRoot.toAbsolutePath()));
+ this.relativePath = inputRoot.relativize(fullPath).toString();
//need to set these now so that the filter can determine
//whether or not to crawl this file
- metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getName());
- metadata.set(Metadata.CONTENT_LENGTH, Long.toString(fullPath.length()));
+ metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
+ long sz = -1;
+ try {
+ sz = Files.size(fullPath);
+ } catch (IOException e) {
+ //swallow
+ //not existent file will be handled downstream
+ }
+ metadata.set(Metadata.CONTENT_LENGTH, Long.toString(sz));
metadata.set(FSProperties.FS_REL_PATH, relativePath);
metadata.set(FileResource.FILE_EXTENSION, getExtension(fullPath));
}
@@ -67,8 +98,8 @@ public class FSFileResource implements F
* @param fullPath full path from which to try to find an extension
* @return the lowercased extension or an empty string
*/
- private String getExtension(File fullPath) {
- String p = fullPath.getName();
+ private String getExtension(Path fullPath) {
+ String p = fullPath.getFileName().toString();
int i = p.lastIndexOf(".");
if (i > -1) {
return p.substring(i + 1).toLowerCase(Locale.ROOT);
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java Wed Sep 30 15:59:57 2015
@@ -16,9 +16,6 @@ package org.apache.tika.batch.fs;
* limitations under the License.
*/
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.FileResourceCrawler;
-
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
@@ -26,23 +23,64 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.concurrent.ArrayBlockingQueue;
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.FileResourceCrawler;
+
/**
* Class that "crawls" a list of files.
*/
public class FSListCrawler extends FileResourceCrawler {
private final BufferedReader reader;
- private final File root;
+ private final Path root;
+ /**
+ *
+ * @param fileQueue
+ * @param numConsumers
+ * @param root
+ * @param list
+ * @param encoding
+ * @throws FileNotFoundException
+ * @throws UnsupportedEncodingException
+ * @deprecated
+ * @see #FSListCrawler(ArrayBlockingQueue, int, Path, Path, Charset)
+ */
+ @Deprecated
public FSListCrawler(ArrayBlockingQueue<FileResource> fileQueue,
int numConsumers, File root, File list, String encoding)
throws FileNotFoundException, UnsupportedEncodingException {
super(fileQueue, numConsumers);
reader = new BufferedReader(new InputStreamReader(new FileInputStream(list), encoding));
- this.root = root;
+ this.root = Paths.get(root.toURI());
+
+ }
+ /**
+ * Constructor for a crawler that reads a list of files to process.
+ * <p>
+ * The list should be paths relative to the root.
+ *
+ * @param fileQueue queue for batch
+ * @param numConsumers number of consumers
+ * @param root root input director
+ * @param list text file list (one file per line) of paths relative to
+ * the root for processing
+ * @param charset charset of the file
+ * @throws IOException
+ */
+ public FSListCrawler(ArrayBlockingQueue<FileResource> fileQueue,
+ int numConsumers, Path root, Path list, Charset charset)
+ throws IOException {
+ super(fileQueue, numConsumers);
+ reader = Files.newBufferedReader(list, charset);
+ this.root = root;
}
public void start() throws InterruptedException {
@@ -52,14 +90,14 @@ public class FSListCrawler extends FileR
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("file adder interrupted");
}
- File f = new File(root, line);
- if (! f.exists()) {
- logger.warn("File doesn't exist:"+f.getAbsolutePath());
+ Path f = Paths.get(root.toString(), line);
+ if (! Files.exists(f)) {
+ logger.warn("File doesn't exist:"+f.toAbsolutePath());
line = nextLine();
continue;
}
- if (f.isDirectory()) {
- logger.warn("File is a directory:"+f.getAbsolutePath());
+ if (Files.isDirectory(f)) {
+ logger.warn("File is a directory:"+f.toAbsolutePath());
line = nextLine();
continue;
}
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java Wed Sep 30 15:59:57 2015
@@ -16,10 +16,13 @@ package org.apache.tika.batch.fs;
* limitations under the License.
*/
+import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
@@ -37,14 +40,28 @@ public class FSOutputStreamFactory imple
}
private final FSUtil.HANDLE_EXISTING handleExisting;
- private final File outputRoot;
+ private final Path outputRoot;
private final String suffix;
private final COMPRESSION compression;
+ /**
+ *
+ * @param outputRoot
+ * @param handleExisting
+ * @param compression
+ * @param suffix
+ * @see #FSOutputStreamFactory(Path, FSUtil.HANDLE_EXISTING, COMPRESSION, String)
+ */
+ @Deprecated
public FSOutputStreamFactory(File outputRoot, FSUtil.HANDLE_EXISTING handleExisting,
COMPRESSION compression, String suffix) {
+ this(Paths.get(outputRoot.toURI()),
+ handleExisting, compression, suffix);
+ }
+ public FSOutputStreamFactory(Path outputRoot, FSUtil.HANDLE_EXISTING handleExisting,
+ COMPRESSION compression, String suffix) {
this.handleExisting = handleExisting;
- this.outputRoot = outputRoot.getAbsoluteFile();
+ this.outputRoot = outputRoot;
this.suffix = suffix;
this.compression = compression;
}
@@ -68,27 +85,30 @@ public class FSOutputStreamFactory imple
@Override
public OutputStream getOutputStream(Metadata metadata) throws IOException {
String initialRelativePath = metadata.get(FSProperties.FS_REL_PATH);
- File outputFile = FSUtil.getOutputFile(outputRoot, initialRelativePath, handleExisting, suffix);
- if (outputFile == null) {
+ Path outputPath = FSUtil.getOutputPath(outputRoot, initialRelativePath, handleExisting, suffix);
+ if (outputPath == null) {
return null;
}
- if (! outputFile.getParentFile().isDirectory()) {
- boolean success = outputFile.getParentFile().mkdirs();
- //with multithreading, it is possible that the parent file was created between
- //the test and the attempt to .mkdirs(); mkdirs() returns false if the dirs already exist
- if (! success && ! outputFile.getParentFile().isDirectory()) {
- throw new IOException("Couldn't create parent directory for:"+outputFile.getAbsolutePath());
+ if (!Files.isDirectory(outputPath.getParent())) {
+ Files.createDirectories(outputPath.getParent());
+ //TODO: shouldn't need this any more in java 7, right?
+ if (! Files.isDirectory(outputPath.getParent())) {
+ throw new IOException("Couldn't create parent directory for:"+outputPath.toAbsolutePath());
}
}
- OutputStream os = new FileOutputStream(outputFile);
- if (compression == COMPRESSION.BZIP2){
- os = new BZip2CompressorOutputStream(os);
- } else if (compression == COMPRESSION.GZIP) {
- os = new GZIPOutputStream(os);
- } else if (compression == COMPRESSION.ZIP) {
- os = new ZipArchiveOutputStream(os);
+ OutputStream os = Files.newOutputStream(outputPath);
+ switch (compression) {
+ case BZIP2:
+ os = new BZip2CompressorOutputStream(os);
+ break;
+ case GZIP:
+ os = new GZIPOutputStream(os);
+ break;
+ case ZIP:
+ os = new ZipArchiveOutputStream(os);
+ break;
}
- return os;
+ return new BufferedOutputStream(os);
}
}
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java Wed Sep 30 15:59:57 2015
@@ -19,6 +19,9 @@ package org.apache.tika.batch.fs;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -29,6 +32,7 @@ import java.util.regex.Pattern;
*/
public class FSUtil {
+ @Deprecated
public static boolean checkThisIsAncestorOfThat(File ancestor, File child) {
int ancLen = ancestor.getAbsolutePath().length();
int childLen = child.getAbsolutePath().length();
@@ -41,6 +45,7 @@ public class FSUtil {
}
+ @Deprecated
public static boolean checkThisIsAncestorOfOrSameAsThat(File ancestor, File child) {
if (ancestor.equals(child)) {
return true;
@@ -79,18 +84,57 @@ public class FSUtil {
* after trying to increment the file count (e.g. fileA(2).docx) 10000 times
* and then after trying 20,000 UUIDs.
*
- * @param outputRoot directory root for output
+ * @param outputRoot directory root for output
* @param initialRelativePath initial relative path (including file name, which may be renamed)
- * @param handleExisting what to do if the output file exists
- * @param suffix suffix to add to files, can be null
+ * @param handleExisting what to do if the output file exists
+ * @param suffix suffix to add to files, can be null
* @return output file or null if no output file should be created
* @throws java.io.IOException
+ * @see #getOutputPath(Path, String, HANDLE_EXISTING, String)
*/
+ @Deprecated
public static File getOutputFile(File outputRoot, String initialRelativePath,
HANDLE_EXISTING handleExisting, String suffix) throws IOException {
+ return getOutputPath(Paths.get(outputRoot.toURI()),
+ initialRelativePath, handleExisting, suffix).toFile();
+ }
+
+ /**
+ * Given an output root and an initial relative path,
+ * return the output file according to the HANDLE_EXISTING strategy
+ * <p/>
+ * In the most basic use case, given a root directory "input",
+ * a file's relative path "dir1/dir2/fileA.docx", and an output directory
+ * "output", the output file would be "output/dir1/dir2/fileA.docx."
+ * <p/>
+ * If HANDLE_EXISTING is set to OVERWRITE, this will not check to see if the output already exists,
+ * and the returned file could overwrite an existing file!!!
+ * <p/>
+ * If HANDLE_EXISTING is set to RENAME, this will try to increment a counter at the end of
+ * the file name (fileA(2).docx) until there is a file name that doesn't exist.
+ * <p/>
+ * This will return null if handleExisting == HANDLE_EXISTING.SKIP and
+ * the candidate file already exists.
+ * <p/>
+ * This will throw an IOException if HANDLE_EXISTING is set to
+ * RENAME, and a candidate cannot output file cannot be found
+ * after trying to increment the file count (e.g. fileA(2).docx) 10000 times
+ * and then after trying 20,000 UUIDs.
+ *
+ * @param outputRoot root directory into which to put the path
+ * @param initialRelativePath relative path including file ("somedir/subdir1/file.doc")
+ * @param handleExisting policy for what to do if the output path already exists
+ * @param suffix suffix to add to the output path
+ * @return can return null
+ * @throws IOException
+ */
+ public static Path getOutputPath(Path outputRoot, String initialRelativePath,
+ HANDLE_EXISTING handleExisting, String suffix) throws IOException {
+
String localSuffix = (suffix == null) ? "" : suffix;
- File cand = new File(outputRoot, initialRelativePath+ "." +localSuffix);
- if (cand.isFile()) {
+ Path cand = FSUtil.resolveRelative(outputRoot,
+ initialRelativePath + "." + localSuffix);
+ if (Files.exists(cand)) {
if (handleExisting.equals(HANDLE_EXISTING.OVERWRITE)) {
return cand;
} else if (handleExisting.equals(HANDLE_EXISTING.SKIP)) {
@@ -110,8 +154,9 @@ public class FSUtil {
String fNameBase = null;
String fNameExt = "";
//this doesn't include the addition of the localSuffix
- File candOnly = new File(outputRoot, initialRelativePath);
- Matcher m = FILE_NAME_PATTERN.matcher(candOnly.getName());
+ Path candOnly = FSUtil.resolveRelative(outputRoot,
+ initialRelativePath);
+ Matcher m = FILE_NAME_PATTERN.matcher(candOnly.getFileName().toString());
if (m.find()) {
fNameBase = m.group(1);
@@ -127,23 +172,40 @@ public class FSUtil {
}
}
- File outputParent = cand.getParentFile();
- while (fNameBase != null && cand.isFile() && ++cnt < 10000) {
- String candFileName = fNameBase + "(" + cnt + ")." + fNameExt+ "" +localSuffix;
- cand = new File(outputParent, candFileName);
+ Path outputParent = cand.getParent();
+ while (fNameBase != null && Files.exists(cand) && ++cnt < 10000) {
+ String candFileName = fNameBase + "(" + cnt + ")." + fNameExt + "" + localSuffix;
+ cand = FSUtil.resolveRelative(outputParent, candFileName);
}
//reset count to 0 and try 20000 times
cnt = 0;
- while (cand.isFile() && cnt++ < 20000) {
+ while (Files.exists(cand) && cnt++ < 20000) {
UUID uid = UUID.randomUUID();
- cand = new File(outputParent, uid.toString() + fNameExt+ "" +localSuffix);
+ cand = FSUtil.resolveRelative(outputParent,
+ uid.toString() + fNameExt + "" + localSuffix);
}
- if (cand.isFile()) {
+ if (Files.exists(cand)) {
throw new IOException("Couldn't find candidate output file after trying " +
"very, very hard");
}
return cand;
}
+ /**
+ * Convenience method to ensure that "other" is not an absolute path.
+ * One could imagine malicious use of this.
+ *
+ * @param p
+ * @param other
+ * @return resolved path
+ * @throws IllegalArgumentException if "other" is an absolute path
+ */
+ public static Path resolveRelative(Path p, String other) {
+ Path op = Paths.get(other);
+ if (op.isAbsolute()) {
+ throw new IllegalArgumentException(other + " cannot be an absolute path!");
+ }
+ return p.resolve(op);
+ }
}
\ No newline at end of file
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java Wed Sep 30 15:59:57 2015
@@ -17,7 +17,10 @@
package org.apache.tika.batch.fs.builders;
-import java.io.File;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -86,8 +89,8 @@ public class BasicTikaFSConsumersBuilder
}
}
if (tikaConfigPath != null) {
- try {
- config = new TikaConfig(new File(tikaConfigPath));
+ try (InputStream is = Files.newInputStream(Paths.get(tikaConfigPath))) {
+ config = new TikaConfig(is);
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -166,7 +169,7 @@ public class BasicTikaFSConsumersBuilder
private OutputStreamFactory getOutputStreamFactory(Node node, Map<String, String> runtimeAttributes) {
Map<String, String> attrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
- File outputDir = PropsUtil.getFile(attrs.get("outputDir"), null);
+ Path outputDir = PropsUtil.getPath(attrs.get("outputDir"), null);
/* FSUtil.HANDLE_EXISTING handleExisting = null;
String handleExistingString = attrs.get("handleExisting");
if (handleExistingString == null) {
@@ -194,7 +197,7 @@ public class BasicTikaFSConsumersBuilder
}
String suffix = attrs.get("outputSuffix");
- //TODO: possibly open up the different handle existings in the future
+ //TODO: possibly open up the different handle-existings in the future
//but for now, lock it down to require skip. Too dangerous otherwise
//if the driver restarts and this is set to overwrite...
return new FSOutputStreamFactory(outputDir, FSUtil.HANDLE_EXISTING.SKIP,
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java Wed Sep 30 15:59:57 2015
@@ -18,7 +18,12 @@ package org.apache.tika.batch.fs.builder
*/
-import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
@@ -30,6 +35,7 @@ import org.apache.tika.batch.builders.Ba
import org.apache.tika.batch.builders.ICrawlerBuilder;
import org.apache.tika.batch.fs.FSDirectoryCrawler;
import org.apache.tika.batch.fs.FSDocumentSelector;
+import org.apache.tika.batch.fs.FSListCrawler;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.util.PropsUtil;
import org.apache.tika.util.XMLDOMUtil;
@@ -62,7 +68,8 @@ public class FSCrawlerBuilder implements
Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
- File inputDir = PropsUtil.getFile(attributes.get(INPUT_DIR_ATTR), new File("input"));
+ Path inputDir = PropsUtil.getPath(attributes.get(INPUT_DIR_ATTR),
+ Paths.get("input"));
FileResourceCrawler crawler = null;
if (attributes.containsKey("fileList")) {
String randomCrawlString = attributes.get(CRAWL_ORDER);
@@ -71,18 +78,23 @@ public class FSCrawlerBuilder implements
//TODO: change to logger warn or throw RuntimeException?
System.err.println("randomCrawl attribute is ignored by FSListCrawler");
}
- File fileList = PropsUtil.getFile(attributes.get("fileList"), null);
- String encoding = PropsUtil.getString(attributes.get("fileListEncoding"), "UTF-8");
+ Path fileList = PropsUtil.getPath(attributes.get("fileList"), null);
+ String encodingString = PropsUtil.getString(attributes.get("fileListEncoding"), "UTF-8");
+
try {
- crawler = new org.apache.tika.batch.fs.FSListCrawler(queue, numConsumers, inputDir, fileList, encoding);
- } catch (java.io.FileNotFoundException e) {
- throw new RuntimeException("fileList file not found for FSListCrawler: " + fileList.getAbsolutePath());
- } catch (java.io.UnsupportedEncodingException e) {
- throw new RuntimeException("fileList encoding not supported: "+encoding);
+ Charset encoding = Charset.forName(encodingString);
+ crawler = new FSListCrawler(queue, numConsumers, inputDir, fileList, encoding);
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException("fileList file not found for FSListCrawler: " +
+ fileList.toAbsolutePath());
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("fileList encoding not supported: "+encodingString);
+ } catch (IOException e) {
+ throw new RuntimeException("IOException while trying to open fileList: " + e.getMessage());
}
} else {
FSDirectoryCrawler.CRAWL_ORDER crawlOrder = getCrawlOrder(attributes.get(CRAWL_ORDER));
- File startDir = PropsUtil.getFile(attributes.get(INPUT_START_DIR_ATTR), null);
+ Path startDir = PropsUtil.getPath(attributes.get(INPUT_START_DIR_ATTR), null);
if (startDir == null) {
crawler = new FSDirectoryCrawler(queue, numConsumers, inputDir, crawlOrder);
} else {
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java Wed Sep 30 15:59:57 2015
@@ -17,11 +17,15 @@ package org.apache.tika.batch.fs.strawma
* limitations under the License.
*/
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
@@ -53,111 +57,107 @@ public class StrawManTikaAppDriver imple
private static AtomicInteger threadCount = new AtomicInteger(0);
private final int totalThreads;
private final int threadNum;
- private int rootLen = -1;
- private File inputDir = null;
- private File outputDir = null;
+ private Path inputRoot = null;
+ private Path outputRoot = null;
private String[] args = null;
private Logger logger = LoggerFactory.getLogger(StrawManTikaAppDriver.class);
- public StrawManTikaAppDriver(File inputDir, File outputDir, int totalThreads, String[] args) {
- rootLen = inputDir.getAbsolutePath().length()+1;
- this.inputDir = inputDir;
- this.outputDir = outputDir;
+ public StrawManTikaAppDriver(Path inputRoot, Path outputRoot,
+ int totalThreads, String[] args) {
+ this.inputRoot = inputRoot;
+ this.outputRoot = outputRoot;
this.args = args;
threadNum = threadCount.getAndIncrement();
this.totalThreads = totalThreads;
}
- private int processDirectory(File inputDir) {
- int processed = 0;
- if (inputDir == null || inputDir.listFiles() == null) {
- return processed;
- }
- for (File f : inputDir.listFiles()) {
- List<File> childDirs = new ArrayList<File>();
- if (f.isDirectory()) {
- childDirs.add(f);
- } else {
- processed += processFile(f);
- }
- for (File dir : childDirs) {
- processed += processDirectory(dir);
+ private class TikaVisitor extends SimpleFileVisitor<Path> {
+ private int processed = 0;
- }
+ int getProcessed() {
+ return processed;
}
- return processed;
- }
-
- private int processFile(File f) {
- if (totalThreads > 1) {
- int hashCode = f.getAbsolutePath().hashCode();
- if (Math.abs(hashCode % totalThreads) != threadNum) {
- return 0;
+ @Override
+ public FileVisitResult visitFile(Path file,
+ BasicFileAttributes attr) {
+ if (totalThreads > 1) {
+ int hashCode = file.toAbsolutePath().toString().hashCode();
+ if (Math.abs(hashCode % totalThreads) != threadNum) {
+ return FileVisitResult.CONTINUE;
+ }
}
- }
- File outputFile = new File(outputDir, f.getAbsolutePath().substring(rootLen)+".txt");
- outputFile.getAbsoluteFile().getParentFile().mkdirs();
- if (! outputFile.getParentFile().exists()) {
- logger.error(MarkerFactory.getMarker("FATAL"),
- "parent directory for "+ outputFile + " was not made!");
- throw new RuntimeException("couldn't make parent file for " + outputFile);
- }
- List<String> commandLine = new ArrayList<String>();
- for (String arg : args) {
- commandLine.add(arg);
- }
- commandLine.add("-t");
- commandLine.add("\""+f.getAbsolutePath()+"\"");
- ProcessBuilder builder = new ProcessBuilder(commandLine.toArray(new String[commandLine.size()]));
- logger.info("about to process: "+f.getAbsolutePath());
- Process proc = null;
- RedirectGobbler gobbler = null;
- Thread gobblerThread = null;
- try {
- OutputStream os = new FileOutputStream(outputFile);
- proc = builder.start();
- gobbler = new RedirectGobbler(proc.getInputStream(), os);
- gobblerThread = new Thread(gobbler);
- gobblerThread.start();
- } catch (IOException e) {
- logger.error(e.getMessage());
- return 0;
- }
-
- boolean finished = false;
- long totalTime = 180000;//3 minutes
- long pulse = 100;
- for (int i = 0; i < totalTime; i += pulse) {
+ assert(file.startsWith(inputRoot));
+ Path relPath = inputRoot.relativize(file);
+ Path outputFile = Paths.get(outputRoot.toAbsolutePath().toString(),
+ relPath.toString() + ".txt");
try {
- Thread.currentThread().sleep(pulse);
- } catch (InterruptedException e) {
- //swallow
- }
+ Files.createDirectories(outputFile.getParent());
+ } catch (IOException e) {
+ logger.error(MarkerFactory.getMarker("FATAL"),
+ "parent directory for "+ outputFile + " was not made!");
+ throw new RuntimeException("couldn't make parent file for " + outputFile);
+ }
+ List<String> commandLine = new ArrayList<>();
+ for (String arg : args) {
+ commandLine.add(arg);
+ }
+ commandLine.add("-t");
+ commandLine.add("\""+outputFile.toAbsolutePath()+"\"");
+ ProcessBuilder builder = new ProcessBuilder(commandLine.toArray(new String[commandLine.size()]));
+ logger.info("about to process: "+file.toAbsolutePath());
+ Process proc = null;
+ RedirectGobbler gobbler = null;
+ Thread gobblerThread = null;
try {
- int exit = proc.exitValue();
- finished = true;
- break;
- } catch (IllegalThreadStateException e) {
- //swallow
+ OutputStream os = Files.newOutputStream(outputFile);
+ proc = builder.start();
+ gobbler = new RedirectGobbler(proc.getInputStream(), os);
+ gobblerThread = new Thread(gobbler);
+ gobblerThread.start();
+ } catch (IOException e) {
+ logger.error(e.getMessage());
+ return FileVisitResult.CONTINUE;
+ }
+
+ boolean finished = false;
+ long totalTime = 180000;//3 minutes
+ long pulse = 100;
+ for (int i = 0; i < totalTime; i += pulse) {
+ try {
+ Thread.currentThread().sleep(pulse);
+ } catch (InterruptedException e) {
+ //swallow
+ }
+ try {
+ int exit = proc.exitValue();
+ finished = true;
+ break;
+ } catch (IllegalThreadStateException e) {
+ //swallow
+ }
}
+ if (!finished) {
+ logger.warn("Had to kill process working on: " + file.toAbsolutePath());
+ proc.destroy();
+ }
+ gobbler.close();
+ gobblerThread.interrupt();
+ processed++;
+ return FileVisitResult.CONTINUE;
}
- if (!finished) {
- logger.warn("Had to kill process working on: " + f.getAbsolutePath());
- proc.destroy();
- }
- gobbler.close();
- gobblerThread.interrupt();
- return 1;
+
}
+
@Override
public Integer call() throws Exception {
long start = new Date().getTime();
-
- int processed = processDirectory(inputDir);
+ TikaVisitor v = new TikaVisitor();
+ Files.walkFileTree(inputRoot, v);
+ int processed = v.getProcessed();
double elapsedSecs = ((double)new Date().getTime()-(double)start)/(double)1000;
logger.info("Finished processing " + processed + " files in " + elapsedSecs + " seconds.");
return processed;
@@ -202,6 +202,8 @@ public class StrawManTikaAppDriver imple
}
}
+
+
public static String usage() {
StringBuilder sb = new StringBuilder();
sb.append("Example usage:\n");
@@ -216,8 +218,8 @@ public class StrawManTikaAppDriver imple
if (args.length < 6) {
System.err.println(StrawManTikaAppDriver.usage());
}
- File inputDir = new File(args[0]);
- File outputDir = new File(args[1]);
+ Path inputDir = Paths.get(args[0]);
+ Path outputDir = Paths.get(args[1]);
int totalThreads = Integer.parseInt(args[2]);
List<String> commandLine = new ArrayList<String>();
Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java Wed Sep 30 15:59:57 2015
@@ -18,6 +18,8 @@ package org.apache.tika.util;
*/
import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Locale;
/**
@@ -91,7 +93,9 @@ public class PropsUtil {
* @param v string to parse
* @param defaultMissing value to return if value is null or unparseable
* @return parsed value
+ * @see #getPath(String, Path)
*/
+ @Deprecated
public static File getFile(String v, File defaultMissing) {
if (v == null || v.length() == 0) {
return defaultMissing;
@@ -120,4 +124,26 @@ public class PropsUtil {
}
return v;
}
+
+ /**
+ * Parses v. If there is a problem, this returns defaultMissing.
+ *
+ * @param v string to parse
+ * @param defaultMissing value to return if value is null or unparseable
+ * @return parsed value
+ * @see #getPath(String, Path)
+ */
+ public static Path getPath(String v, Path defaultMissing) {
+ if (v == null || v.length() == 0) {
+ return defaultMissing;
+ }
+ //trim initial and final " if they exist
+ if (v.startsWith("\"")) {
+ v = v.substring(1);
+ }
+ if (v.endsWith("\"")) {
+ v = v.substring(0, v.length()-1);
+ }
+ return Paths.get(v);
+ }
}
Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java Wed Sep 30 15:59:57 2015
@@ -20,15 +20,14 @@ package org.apache.tika.batch.fs;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
-import org.apache.commons.io.FileUtils;
import org.apache.tika.batch.BatchProcessDriverCLI;
import org.junit.Test;
@@ -41,28 +40,30 @@ public class BatchDriverTest extends FSB
@Test(timeout = 15000)
public void oneHeavyHangTest() throws Exception {
//batch runner hits one heavy hang file, keep going
- File outputDir = getNewOutputDir("daemon-");
- assertNotNull(outputDir.listFiles());
+ Path outputDir = getNewOutputDir("daemon-");
+ assertTrue(Files.isDirectory(outputDir));
//make sure output directory is empty!
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
String[] args = getDefaultCommandLineArgsArr("one_heavy_hang", outputDir, null);
BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
driver.execute();
+
assertEquals(0, driver.getNumRestarts());
assertFalse(driver.getUserInterrupted());
- assertEquals(5, outputDir.listFiles().length);
+ assertEquals(5, countChildren(outputDir));
+
assertContains("first test file",
- FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
}
@Test(timeout = 30000)
public void restartOnFullHangTest() throws Exception {
//batch runner hits more heavy hangs than threads; needs to restart
- File outputDir = getNewOutputDir("daemon-");
+ Path outputDir = getNewOutputDir("daemon-");
//make sure output directory is empty!
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
String[] args = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, null);
BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
@@ -71,15 +72,15 @@ public class BatchDriverTest extends FSB
assertTrue(driver.getNumRestarts() > 0);
assertFalse(driver.getUserInterrupted());
assertContains("first test file",
- FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
}
@Test(timeout = 15000)
public void noRestartTest() throws Exception {
- File outputDir = getNewOutputDir("daemon-");
+ Path outputDir = getNewOutputDir("daemon-");
//make sure output directory is empty!
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
String[] args = getDefaultCommandLineArgsArr("no_restart", outputDir, null);
String[] mod = Arrays.copyOf(args, args.length + 2);
@@ -90,22 +91,20 @@ public class BatchDriverTest extends FSB
driver.execute();
assertEquals(0, driver.getNumRestarts());
assertFalse(driver.getUserInterrupted());
- File[] files = outputDir.listFiles();
- assertEquals(2, files.length);
- File test2 = new File(outputDir, "test2_norestart.xml.xml");
- assertTrue("test2_norestart.xml", test2.exists());
- File test3 = new File(outputDir, "test3_ok.xml.xml");
- assertFalse("test3_ok.xml", test3.exists());
- assertEquals(0, test3.length());
+ assertEquals(2, countChildren(outputDir));
+ Path test2 = outputDir.resolve("test2_norestart.xml.xml");
+ assertTrue("test2_norestart.xml", Files.exists(test2));
+ Path test3 = outputDir.resolve("test3_ok.xml.xml");
+ assertFalse("test3_ok.xml", Files.exists(test3));
}
@Test(timeout = 15000)
public void restartOnOOMTest() throws Exception {
//batch runner hits more heavy hangs than threads; needs to restart
- File outputDir = getNewOutputDir("daemon-");
+ Path outputDir = getNewOutputDir("daemon-");
//make sure output directory is empty!
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
String[] args = getDefaultCommandLineArgsArr("oom", outputDir, null);
BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
@@ -113,7 +112,7 @@ public class BatchDriverTest extends FSB
assertEquals(1, driver.getNumRestarts());
assertFalse(driver.getUserInterrupted());
assertContains("first test file",
- FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
}
@Test(timeout = 30000)
@@ -121,8 +120,8 @@ public class BatchDriverTest extends FSB
//this tests that if all consumers are hung and the crawler is
//waiting to add to the queue, there isn't deadlock. The BatchProcess should
//just shutdown, and the driver should restart
- File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
- Map<String, String> args = new HashMap<String,String>();
+ Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+ Map<String, String> args = new HashMap<>();
args.put("-numConsumers", "2");
args.put("-maxQueueSize", "2");
String[] commandLine = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, args);
@@ -131,7 +130,7 @@ public class BatchDriverTest extends FSB
assertEquals(3, driver.getNumRestarts());
assertFalse(driver.getUserInterrupted());
assertContains("first test file",
- FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
}
@Test(timeout = 30000)
@@ -140,8 +139,8 @@ public class BatchDriverTest extends FSB
//if -maxRestarts is not correctly removed from the commandline,
//FSBatchProcessCLI's cli parser will throw an Unrecognized option exception
- File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
- Map<String, String> args = new HashMap<String,String>();
+ Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+ Map<String, String> args = new HashMap<>();
args.put("-numConsumers", "1");
args.put("-maxQueueSize", "10");
args.put("-maxRestarts", "2");
@@ -152,14 +151,14 @@ public class BatchDriverTest extends FSB
driver.execute();
assertEquals(2, driver.getNumRestarts());
assertFalse(driver.getUserInterrupted());
- assertEquals(3, outputDir.listFiles().length);
+ assertEquals(3, countChildren(outputDir));
}
@Test(timeout = 30000)
public void maxRestartsBadParameter() throws Exception {
//tests that maxRestarts must be followed by an Integer
- File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
- Map<String, String> args = new HashMap<String,String>();
+ Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+ Map<String, String> args = new HashMap<>();
args.put("-numConsumers", "1");
args.put("-maxQueueSize", "10");
args.put("-maxRestarts", "zebra");
@@ -180,22 +179,22 @@ public class BatchDriverTest extends FSB
//tests that if something goes horribly wrong with FSBatchProcessCLI
//the driver will not restart it again and again
//this calls a bad xml file which should trigger a no restart exit.
- File outputDir = getNewOutputDir("nostart-norestart-");
- Map<String, String> args = new HashMap<String,String>();
+ Path outputDir = getNewOutputDir("nostart-norestart-");
+ Map<String, String> args = new HashMap<>();
args.put("-numConsumers", "1");
args.put("-maxQueueSize", "10");
String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args);
BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-broken.xml", commandLine);
driver.execute();
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
assertEquals(0, driver.getNumRestarts());
}
@Test(timeout = 30000)
public void testNoRestartIfProcessFailsTake2() throws Exception {
- File outputDir = getNewOutputDir("nostart-norestart-");
- Map<String, String> args = new HashMap<String,String>();
+ Path outputDir = getNewOutputDir("nostart-norestart-");
+ Map<String, String> args = new HashMap<>();
args.put("-numConsumers", "1");
args.put("-maxQueueSize", "10");
args.put("-somethingOrOther", "I don't Know");
@@ -203,7 +202,7 @@ public class BatchDriverTest extends FSB
String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args);
BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine);
driver.execute();
- assertEquals(0, outputDir.listFiles().length);
+ assertEquals(0, countChildren(outputDir));
assertEquals(0, driver.getNumRestarts());
}
Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java Wed Sep 30 15:59:57 2015
@@ -23,11 +23,13 @@ import static org.junit.Assert.assertFal
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
import java.util.Map;
-import org.apache.commons.io.FileUtils;
import org.apache.tika.batch.BatchProcess;
import org.apache.tika.batch.BatchProcessDriverCLI;
import org.junit.Test;
@@ -37,15 +39,15 @@ public class BatchProcessTest extends FS
@Test(timeout = 15000)
public void oneHeavyHangTest() throws Exception {
- File outputDir = getNewOutputDir("one_heavy_hang-");
+ Path outputDir = getNewOutputDir("one_heavy_hang-");
Map<String, String> args = getDefaultArgs("one_heavy_hang", outputDir);
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- assertEquals(5, outputDir.listFiles().length);
- File hvyHang = new File(outputDir, "test0_heavy_hang.xml.xml");
- assertTrue(hvyHang.exists());
- assertEquals(0, hvyHang.length());
+ assertEquals(5, countChildren(outputDir));
+ Path hvyHang = outputDir.resolve("test0_heavy_hang.xml.xml");
+ assertTrue(Files.exists(hvyHang));
+ assertEquals(0, Files.size(hvyHang));
assertNotContained(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
streamStrings.getErrString());
}
@@ -55,16 +57,17 @@ public class BatchProcessTest extends FS
public void allHeavyHangsTest() throws Exception {
//each of the three threads hits a heavy hang. The BatchProcess runs into
//all timedouts and shuts down.
- File outputDir = getNewOutputDir("allHeavyHangs-");
+ Path outputDir = getNewOutputDir("allHeavyHangs-");
Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- assertEquals(3, outputDir.listFiles().length);
- for (File hvyHang : outputDir.listFiles()){
- assertTrue(hvyHang.exists());
- assertEquals("file length for "+hvyHang.getName()+" should be 0, but is: " +hvyHang.length(),
- 0, hvyHang.length());
+ assertEquals(3, countChildren(outputDir));
+ for (Path hvyHang : listPaths(outputDir)){
+ assertTrue(Files.exists(hvyHang));
+ assertEquals("file length for "+hvyHang.getFileName()+" should be 0, but is: " +
+ Files.size(hvyHang),
+ 0, Files.size(hvyHang));
}
assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
streamStrings.getErrString());
@@ -72,20 +75,20 @@ public class BatchProcessTest extends FS
@Test(timeout = 30000)
public void allHeavyHangsTestWithCrazyNumberConsumersTest() throws Exception {
- File outputDir = getNewOutputDir("allHeavyHangsCrazyNumberConsumers-");
+ Path outputDir = getNewOutputDir("allHeavyHangsCrazyNumberConsumers-");
Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
args.put("numConsumers", "100");
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- assertEquals(7, outputDir.listFiles().length);
+ assertEquals(7, countChildren(outputDir));
for (int i = 0; i < 6; i++){
- File hvyHang = new File(outputDir, "test"+i+"_heavy_hang.xml.xml");
- assertTrue(hvyHang.exists());
- assertEquals(0, hvyHang.length());
+ Path hvyHang = outputDir.resolve("test"+i+"_heavy_hang.xml.xml");
+ assertTrue(Files.exists(hvyHang));
+ assertEquals(0, Files.size(hvyHang));
}
assertContains("This is tika-batch's first test file",
- FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
//key that the process realize that there were no more processable files
//in the queue and does not ask for a restart!
@@ -98,19 +101,19 @@ public class BatchProcessTest extends FS
//this tests that if all consumers are hung and the crawler is
//waiting to add to the queue, there isn't deadlock. The batchrunner should
//shutdown and ask to be restarted.
- File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+ Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
args.put("numConsumers", "2");
args.put("maxQueueSize", "2");
args.put("timeoutThresholdMillis", "100000000");//make sure that the batch process doesn't time out
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- assertEquals(2, outputDir.listFiles().length);
+ assertEquals(2, countChildren(outputDir));
for (int i = 0; i < 2; i++){
- File hvyHang = new File(outputDir, "test"+i+"_heavy_hang.xml.xml");
- assertTrue(hvyHang.exists());
- assertEquals(0, hvyHang.length());
+ Path hvyHang = outputDir.resolve("test"+i+"_heavy_hang.xml.xml");
+ assertTrue(Files.exists(hvyHang));
+ assertEquals(0, Files.size(hvyHang));
}
assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
streamStrings.getErrString());
@@ -125,7 +128,7 @@ public class BatchProcessTest extends FS
//no consumers should process test2-4.txt!
//i.e. the first consumer will finish in 10 seconds and
//then otherwise would be looking for more, but the oom should prevent that
- File outputDir = getNewOutputDir("oom-");
+ Path outputDir = getNewOutputDir("oom-");
Map<String, String> args = getDefaultArgs("oom", outputDir);
args.put("numConsumers", "3");
@@ -134,9 +137,9 @@ public class BatchProcessTest extends FS
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- assertEquals(4, outputDir.listFiles().length);
+ assertEquals(4, countChildren(outputDir));
assertContains("This is tika-batch's first test file",
- FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
streamStrings.getErrString());
@@ -146,7 +149,7 @@ public class BatchProcessTest extends FS
@Test(timeout = 15000)
public void noRestart() throws Exception {
- File outputDir = getNewOutputDir("no_restart");
+ Path outputDir = getNewOutputDir("no_restart");
Map<String, String> args = getDefaultArgs("no_restart", outputDir);
args.put("numConsumers", "1");
@@ -154,12 +157,11 @@ public class BatchProcessTest extends FS
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- File[] files = outputDir.listFiles();
- File test2 = new File(outputDir, "test2_norestart.xml.xml");
- assertTrue("test2_norestart.xml", test2.exists());
- File test3 = new File(outputDir, "test3_ok.xml.xml");
- assertFalse("test3_ok.xml", test3.exists());
- assertEquals(0, test3.length());
+
+ Path test2 = outputDir.resolve("test2_norestart.xml.xml");
+ assertTrue("test2_norestart.xml", Files.exists(test2));
+ Path test3 = outputDir.resolve("test3_ok.xml.xml");
+ assertFalse("test3_ok.xml", Files.exists(test3));
assertContains("exitStatus="+ BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
streamStrings.getOutString());
assertContains("causeForTermination='MAIN_LOOP_EXCEPTION_NO_RESTART'",
@@ -175,7 +177,7 @@ public class BatchProcessTest extends FS
*/
@Test(timeout = 60000)
public void testWaitAfterEarlyTermination() throws Exception {
- File outputDir = getNewOutputDir("wait_after_early_termination");
+ Path outputDir = getNewOutputDir("wait_after_early_termination");
Map<String, String> args = getDefaultArgs("wait_after_early_termination", outputDir);
args.put("numConsumers", "1");
@@ -186,19 +188,19 @@ public class BatchProcessTest extends FS
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- File[] files = outputDir.listFiles();
- assertEquals(1, files.length);
+ assertEquals(1, countChildren(outputDir));
assertContains("<p>some content</p>",
- FileUtils.readFileToString(new File(outputDir, "test0_sleep.xml.xml"), UTF_8));
+ readFileToString(outputDir.resolve("test0_sleep.xml.xml"), UTF_8));
- assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE, streamStrings.getOutString());
+ assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE,
+ streamStrings.getOutString());
assertContains("causeForTermination='BATCH_PROCESS_ALIVE_TOO_LONG'",
streamStrings.getOutString());
}
@Test(timeout = 60000)
public void testTimeOutAfterBeingAskedToShutdown() throws Exception {
- File outputDir = getNewOutputDir("timeout_after_early_termination");
+ Path outputDir = getNewOutputDir("timeout_after_early_termination");
Map<String, String> args = getDefaultArgs("timeout_after_early_termination", outputDir);
args.put("numConsumers", "1");
@@ -208,9 +210,9 @@ public class BatchProcessTest extends FS
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- File[] files = outputDir.listFiles();
- assertEquals(1, files.length);
- assertEquals(0, files[0].length());
+ List<Path> paths = listPaths(outputDir);
+ assertEquals(1, paths.size());
+ assertEquals(0, Files.size(paths.get(0)));
assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE, streamStrings.getOutString());
assertContains("causeForTermination='BATCH_PROCESS_ALIVE_TOO_LONG'",
streamStrings.getOutString());
@@ -219,7 +221,7 @@ public class BatchProcessTest extends FS
@Test(timeout = 10000)
public void testRedirectionOfStreams() throws Exception {
//test redirection of system.err to system.out
- File outputDir = getNewOutputDir("noisy_parsers");
+ Path outputDir = getNewOutputDir("noisy_parsers");
Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
args.put("numConsumers", "1");
@@ -227,8 +229,7 @@ public class BatchProcessTest extends FS
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
StreamStrings streamStrings = ex.execute();
- File[] files = outputDir.listFiles();
- assertEquals(1, files.length);
+ assertEquals(1, countChildren(outputDir));
assertContains("System.out", streamStrings.getOutString());
assertContains("System.err", streamStrings.getOutString());
assertEquals(0, streamStrings.getErrString().length());
@@ -237,7 +238,7 @@ public class BatchProcessTest extends FS
@Test(timeout = 10000)
public void testConsumersManagerInitHang() throws Exception {
- File outputDir = getNewOutputDir("init_hang");
+ Path outputDir = getNewOutputDir("init_hang");
Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
args.put("numConsumers", "1");
@@ -250,7 +251,7 @@ public class BatchProcessTest extends FS
@Test(timeout = 10000)
public void testConsumersManagerShutdownHang() throws Exception {
- File outputDir = getNewOutputDir("shutdown_hang");
+ Path outputDir = getNewOutputDir("shutdown_hang");
Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
args.put("numConsumers", "1");
@@ -267,26 +268,27 @@ public class BatchProcessTest extends FS
//tests to make sure that hierarchy is maintained when reading from
//file list
//also tests that list actually works.
- File outputDir = getNewOutputDir("hierarchical_file_list");
+ Path outputDir = getNewOutputDir("hierarchical_file_list");
Map<String, String> args = getDefaultArgs("hierarchical", outputDir);
args.put("numConsumers", "1");
- args.put("fileList", this.getClass().getResource("/testFileList.txt").getPath());
+ args.put("fileList",
+ Paths.get(this.getClass().getResource("/testFileList.txt").toURI()).toString());
args.put("recursiveParserWrapper", "true");
args.put("basicHandlerType", "text");
args.put("outputSuffix", "json");
BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args, "/tika-batch-config-MockConsumersBuilder.xml");
ex.execute();
- File test1 = new File(outputDir, "test1.xml.json");
- File test2 = new File(outputDir, "sub1a/test2.xml.json");
- File test3 = new File(outputDir, "sub1a/sub2a/test3.xml.json");
- assertTrue("test1 exists", test1.exists());
- assertTrue("test1 length > 10", test1.length() > 10);
- assertTrue(test3.exists() && test3.length() > 10);
- File test2Dir = new File(outputDir, "sub1a");
+ Path test1 = outputDir.resolve("test1.xml.json");
+ Path test2 = outputDir.resolve("sub1a/test2.xml.json");
+ Path test3 = outputDir.resolve("sub1a/sub2a/test3.xml.json");
+ assertTrue("test1 exists", Files.exists(test1));
+ assertTrue("test1 length > 10", Files.size(test1) > 10);
+ assertTrue(Files.exists(test3) && Files.size(test3) > 10);
+ Path test2Dir = outputDir.resolve("sub1a");
//should be just the subdirectory, no actual test2 file
- assertEquals(1, test2Dir.listFiles().length);
- assertFalse(test2.exists());
+ assertEquals(1, countChildren(test2Dir));
+ assertFalse(Files.exists(test2));
}
private class BatchProcessTestExecutor {