You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:59:57 UTC

svn commit: r1706060 [1/2] - in /tika/trunk: ./ tika-app/src/main/java/org/apache/tika/cli/ tika-batch/src/main/java/org/apache/tika/batch/ tika-batch/src/main/java/org/apache/tika/batch/fs/ tika-batch/src/main/java/org/apache/tika/batch/fs/builders/ t...

Author: tallison
Date: Wed Sep 30 15:59:57 2015
New Revision: 1706060

URL: http://svn.apache.org/viewvc?rev=1706060&view=rev
Log:
TIKA-1747: migrate to Path from File in tika-batch

Added:
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
    tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Wed Sep 30 15:59:57 2015
@@ -1,5 +1,10 @@
 Release 1.11 - Current Development
 
+  * Upgraded tika-batch to use Path throughout (TIKA-1747 and
+    (TIKA-1754).
+
+  * Upgraded to Path in TikaInputStream (via Yaniv Kunda) (TIKA-1744).
+
   * Changed default content handler type for "/rmeta" in tika-server
     to "xml" to align with "-J" option in tika-app.  
     Clients can now specify handler types via PathParam. (TIKA-1716).

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java Wed Sep 30 15:59:57 2015
@@ -17,8 +17,10 @@
 
 package org.apache.tika.cli;
 
-import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -132,14 +134,15 @@ class BatchCommandLineBuilder {
         //if there are only two args and they are both directories, treat the first
         //as input and the second as output.
         if (args.length == 2 && !args[0].startsWith("-") && ! args[1].startsWith("-")) {
-            File candInput = new File(args[0]);
-            File candOutput = new File(args[1]);
-            if (candOutput.isFile()) {
+            Path candInput = Paths.get(args[0]);
+            Path candOutput = Paths.get(args[1]);
+
+            if (Files.isRegularFile(candOutput)) {
                 throw new IllegalArgumentException("Can't specify an existing file as the "+
                 "second argument for the output directory of a batch process");
             }
 
-            if (candInput.isDirectory()){
+            if (Files.isDirectory(candInput)) {
                 map.put("-inputDir", args[0]);
                 map.put("-outputDir", args[1]);
             }

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Wed Sep 30 15:59:57 2015
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.cli;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.sax.SAXTransformerFactory;
@@ -40,6 +42,9 @@ import java.net.Socket;
 import java.net.URI;
 import java.net.URL;
 import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Enumeration;
@@ -106,8 +111,6 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 /**
  * Simple command line interface for Apache Tika.
  */
@@ -650,9 +653,10 @@ public class TikaCLI {
     private boolean testForBatch(String[] args) {
         if (args.length == 2 && ! args[0].startsWith("-")
                 && ! args[1].startsWith("-")) {
-            File inputCand = new File(args[0]);
-            File outputCand = new File(args[1]);
-            if (inputCand.isDirectory() && !outputCand.isFile()) {
+            Path inputCand = Paths.get(args[0]);
+            Path outputCand = Paths.get(args[1]);
+            if (Files.isDirectory(inputCand) &&
+                    !Files.isRegularFile(outputCand)) {
                 return true;
             }
         }

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java Wed Sep 30 15:59:57 2015
@@ -16,15 +16,17 @@ package org.apache.tika.batch;
  * limitations under the License.
  */
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
@@ -33,8 +35,6 @@ import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 public class BatchProcessDriverCLI {
 
     /**
@@ -252,7 +252,7 @@ public class BatchProcessDriverCLI {
 
     private void start() throws Exception {
         ProcessBuilder builder = new ProcessBuilder(commandLine);
-        builder.directory(new File("."));
+        builder.directory(Paths.get(".").toFile());
         process = builder.start();
 
         errorWatcher = new StreamWatcher(process.getErrorStream());

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java Wed Sep 30 15:59:57 2015
@@ -17,9 +17,10 @@ package org.apache.tika.batch.fs;
  * limitations under the License.
  */
 
-import java.io.File;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
@@ -68,7 +69,7 @@ public class FSBatchProcessCLI {
 
     private TikaInputStream getConfigInputStream(String[] args, boolean logDefault) throws IOException {
         TikaInputStream is = null;
-        File batchConfigFile = getConfigFile(args);
+        Path batchConfigFile = getConfigFile(args);
         if (batchConfigFile != null) {
             //this will throw IOException if it can't find a specified config file
             //better to throw an exception than silently back off to default.
@@ -131,12 +132,12 @@ public class FSBatchProcessCLI {
         System.exit(result.getExitStatus());
     }
 
-    private File getConfigFile(String[] args) {
-        File configFile = null;
+    private Path getConfigFile(String[] args) {
+        Path configFile = null;
         for (int i = 0; i < args.length; i++) {
             if (args[i].equals("-bc") || args[i].equals("-batch-config")) {
                 if (i < args.length-1) {
-                    configFile = new File(args[i+1]);
+                    configFile = Paths.get(args[i+1]);
                 }
             }
         }

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDirectoryCrawler.java Wed Sep 30 15:59:57 2015
@@ -16,11 +16,15 @@ package org.apache.tika.batch.fs;
  * limitations under the License.
  */
 
-import java.io.File;
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 
@@ -36,33 +40,35 @@ public class FSDirectoryCrawler extends
         OS_ORDER //operating system chooses
     }
 
-    private final File root;
-    private final File startDirectory;
-    private final Comparator<File> fileComparator = new FileNameComparator();
+    private final Path root;
+    private final Path startDirectory;
+    private final Comparator<Path> pathComparator = new FileNameComparator();
     private CRAWL_ORDER crawlOrder;
 
     public FSDirectoryCrawler(ArrayBlockingQueue<FileResource> fileQueue,
-                              int numConsumers, File root, CRAWL_ORDER crawlOrder) {
+                              int numConsumers, Path root, CRAWL_ORDER crawlOrder) {
         super(fileQueue, numConsumers);
         this.root = root;
         this.startDirectory = root;
         this.crawlOrder = crawlOrder;
-        if (! startDirectory.isDirectory()) {
-            throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.getAbsolutePath());
+        if (!Files.isDirectory(startDirectory)) {
+            throw new RuntimeException("Crawler couldn't find this directory:" +
+                    startDirectory.toAbsolutePath());
         }
 
     }
 
     public FSDirectoryCrawler(ArrayBlockingQueue<FileResource> fileQueue,
-                              int numConsumers, File root, File startDirectory,
+                              int numConsumers, Path root, Path startDirectory,
                               CRAWL_ORDER crawlOrder) {
         super(fileQueue, numConsumers);
         this.root = root;
         this.startDirectory = startDirectory;
         this.crawlOrder = crawlOrder;
-        assert(FSUtil.checkThisIsAncestorOfOrSameAsThat(root, startDirectory));
-        if (! startDirectory.isDirectory()) {
-            throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.getAbsolutePath());
+        assert(startDirectory.toAbsolutePath().startsWith(root.toAbsolutePath()));
+
+        if (! Files.isDirectory(startDirectory)) {
+            throw new RuntimeException("Crawler couldn't find this directory:" + startDirectory.toAbsolutePath());
         }
     }
 
@@ -70,58 +76,63 @@ public class FSDirectoryCrawler extends
         addFiles(startDirectory);
     }
 
-    private void addFiles(File directory) throws InterruptedException {
+    private void addFiles(Path directory) throws InterruptedException {
 
-        if (directory == null ||
-                !directory.isDirectory() || !directory.canRead()) {
-            String path = "null path";
-            if (directory != null) {
-                path = directory.getAbsolutePath();
-            }
-            logger.warn("FSFileAdder can't read this directory: " + path);
+        if (directory == null) {
+            logger.warn("FSFileAdder asked to process null directory?!");
             return;
         }
 
-        List<File> directories = new ArrayList<File>();
-        File[] fileArr = directory.listFiles();
-        if (fileArr == null) {
-            logger.info("Empty directory: " + directory.getAbsolutePath());
+        List<Path> files = new ArrayList<>();
+        try (DirectoryStream ds = Files.newDirectoryStream(directory)){
+            Iterator<Path> it = ds.iterator();
+            while (it.hasNext()) {
+                files.add(it.next());
+            }
+        } catch (IOException e) {
+            logger.warn("FSFileAdder couldn't read "+directory.toAbsolutePath() +
+            ": "+e.getMessage());
+        }
+        if (files.size() == 0) {
+            logger.info("Empty directory: " + directory.toAbsolutePath());
             return;
         }
 
-        List<File> files = new ArrayList<File>(Arrays.asList(fileArr));
 
         if (crawlOrder == CRAWL_ORDER.RANDOM) {
             Collections.shuffle(files);
         } else if (crawlOrder == CRAWL_ORDER.SORTED) {
-            Collections.sort(files, fileComparator);
+            Collections.sort(files, pathComparator);
         }
 
         int numFiles = 0;
-        for (File f : files) {
+        List<Path> directories = new LinkedList<>();
+        for (Path f : files) {
             if (Thread.currentThread().isInterrupted()) {
                 throw new InterruptedException("file adder interrupted");
             }
-
-            if (f.isFile()) {
-                numFiles++;
-                if (numFiles == 1) {
-                    handleFirstFileInDirectory(f);
-                }
+            if (!Files.isReadable(f)) {
+                logger.warn("Skipping -- "+f.toAbsolutePath()+
+                        " -- file/directory is not readable");
+                continue;
             }
-            if (f.isDirectory()) {
+            if (Files.isDirectory(f)) {
                 directories.add(f);
                 continue;
             }
+            numFiles++;
+            if (numFiles == 1) {
+                handleFirstFileInDirectory(f);
+            }
             int added = tryToAdd(new FSFileResource(root, f));
             if (added == FileResourceCrawler.STOP_NOW) {
-                logger.debug("crawler has hit a limit: "+f.getAbsolutePath() + " : " + added);
+                logger.debug("crawler has hit a limit: "+f.toAbsolutePath() + " : " + added);
                 return;
             }
-            logger.debug("trying to add: "+f.getAbsolutePath() + " : " + added);
+            logger.debug("trying to add: "+f.toAbsolutePath() + " : " + added);
         }
 
-        for (File f : directories) {
+        for (Path f : directories) {
             addFiles(f);
         }
     }
@@ -135,21 +146,21 @@ public class FSDirectoryCrawler extends
      *
      * @param f file to handle
      */
-    public void handleFirstFileInDirectory(File f) {
+    public void handleFirstFileInDirectory(Path f) {
         //no-op
     }
 
     //simple lexical order for the file name, we don't really care about localization.
     //we do want this, though, because file.compareTo behaves differently
     //on different OS's.
-    private class FileNameComparator implements Comparator<File> {
+    private class FileNameComparator implements Comparator<Path> {
 
         @Override
-        public int compare(File f1, File f2) {
+        public int compare(Path f1, Path f2) {
             if (f1 == null || f2 == null) {
                 return 0;
             }
-            return f1.getName().compareTo(f2.getName());
+            return f1.getFileName().toString().compareTo(f2.getFileName().toString());
         }
     }
 }

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java Wed Sep 30 15:59:57 2015
@@ -20,6 +20,9 @@ package org.apache.tika.batch.fs;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Locale;
 
 import org.apache.tika.batch.FileResource;
@@ -39,21 +42,49 @@ import org.apache.tika.metadata.Metadata
  */
 public class FSFileResource implements FileResource {
 
-    private final File fullPath;
+    private final Path fullPath;
     private final String relativePath;
     private final Metadata metadata;
 
+    /**
+     *
+     * @param inputRoot
+     * @param fullPath
+     * @see FSFileResource#FSFileResource(Path, Path)
+     * @deprecated to be removed in Tika 2.0
+     */
+    @Deprecated
     public FSFileResource(File inputRoot, File fullPath) {
+        this(Paths.get(inputRoot.getAbsolutePath()),
+                Paths.get(fullPath.getAbsolutePath()));
+    }
+
+    /**
+     * Constructor
+     *
+     * @param inputRoot the input root for the file
+     * @param fullPath the full path to the file
+     * @throws IllegalArgumentException if the fullPath is not
+     * a child of inputRoot
+     */
+    public FSFileResource(Path inputRoot, Path fullPath) {
         this.fullPath = fullPath;
         this.metadata = new Metadata();
         //child path must actually be a child
-        assert(FSUtil.checkThisIsAncestorOfThat(inputRoot, fullPath));
-        this.relativePath = fullPath.getAbsolutePath().substring(inputRoot.getAbsolutePath().length()+1);
+        assert(fullPath.toAbsolutePath().startsWith(inputRoot.toAbsolutePath()));
+        this.relativePath = inputRoot.relativize(fullPath).toString();
 
         //need to set these now so that the filter can determine
         //whether or not to crawl this file
-        metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getName());
-        metadata.set(Metadata.CONTENT_LENGTH, Long.toString(fullPath.length()));
+        metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
+        long sz = -1;
+        try {
+            sz = Files.size(fullPath);
+        } catch (IOException e) {
+            //swallow
+            //not existent file will be handled downstream
+        }
+        metadata.set(Metadata.CONTENT_LENGTH, Long.toString(sz));
         metadata.set(FSProperties.FS_REL_PATH, relativePath);
         metadata.set(FileResource.FILE_EXTENSION, getExtension(fullPath));
     }
@@ -67,8 +98,8 @@ public class FSFileResource implements F
      * @param fullPath full path from which to try to find an extension
      * @return the lowercased extension or an empty string
      */
-    private String getExtension(File fullPath) {
-        String p = fullPath.getName();
+    private String getExtension(Path fullPath) {
+        String p = fullPath.getFileName().toString();
         int i = p.lastIndexOf(".");
         if (i > -1) {
             return p.substring(i + 1).toLowerCase(Locale.ROOT);

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSListCrawler.java Wed Sep 30 15:59:57 2015
@@ -16,9 +16,6 @@ package org.apache.tika.batch.fs;
  * limitations under the License.
  */
 
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.FileResourceCrawler;
-
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -26,23 +23,64 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.concurrent.ArrayBlockingQueue;
 
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.FileResourceCrawler;
+
 /**
  * Class that "crawls" a list of files.
  */
 public class FSListCrawler extends FileResourceCrawler {
 
     private final BufferedReader reader;
-    private final File root;
+    private final Path root;
 
+    /**
+     *
+     * @param fileQueue
+     * @param numConsumers
+     * @param root
+     * @param list
+     * @param encoding
+     * @throws FileNotFoundException
+     * @throws UnsupportedEncodingException
+     * @deprecated
+     * @see #FSListCrawler(ArrayBlockingQueue, int, Path, Path, Charset)
+     */
+    @Deprecated
     public FSListCrawler(ArrayBlockingQueue<FileResource> fileQueue,
                          int numConsumers, File root, File list, String encoding)
             throws FileNotFoundException, UnsupportedEncodingException {
         super(fileQueue, numConsumers);
         reader = new BufferedReader(new InputStreamReader(new FileInputStream(list), encoding));
-        this.root = root;
+        this.root = Paths.get(root.toURI());
+
+    }
 
+    /**
+     * Constructor for a crawler that reads a list of files to process.
+     * <p>
+     * The list should be paths relative to the root.
+     *
+     * @param fileQueue queue for batch
+     * @param numConsumers number of consumers
+     * @param root root input director
+     * @param list text file list (one file per line) of paths relative to
+     *             the root for processing
+     * @param charset charset of the file
+     * @throws IOException
+     */
+    public FSListCrawler(ArrayBlockingQueue<FileResource> fileQueue,
+                         int numConsumers, Path root, Path list, Charset charset)
+            throws IOException {
+        super(fileQueue, numConsumers);
+        reader = Files.newBufferedReader(list, charset);
+        this.root = root;
     }
 
     public void start() throws InterruptedException {
@@ -52,14 +90,14 @@ public class FSListCrawler extends FileR
             if (Thread.currentThread().isInterrupted()) {
                 throw new InterruptedException("file adder interrupted");
             }
-            File f = new File(root, line);
-            if (! f.exists()) {
-                logger.warn("File doesn't exist:"+f.getAbsolutePath());
+            Path f = Paths.get(root.toString(), line);
+            if (! Files.exists(f)) {
+                logger.warn("File doesn't exist:"+f.toAbsolutePath());
                 line = nextLine();
                 continue;
             }
-            if (f.isDirectory()) {
-                logger.warn("File is a directory:"+f.getAbsolutePath());
+            if (Files.isDirectory(f)) {
+                logger.warn("File is a directory:"+f.toAbsolutePath());
                 line = nextLine();
                 continue;
             }

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSOutputStreamFactory.java Wed Sep 30 15:59:57 2015
@@ -16,10 +16,13 @@ package org.apache.tika.batch.fs;
  * limitations under the License.
  */
 
+import java.io.BufferedOutputStream;
 import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.zip.GZIPOutputStream;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
@@ -37,14 +40,28 @@ public class FSOutputStreamFactory imple
     }
 
     private final FSUtil.HANDLE_EXISTING handleExisting;
-    private final File outputRoot;
+    private final Path outputRoot;
     private final String suffix;
     private final COMPRESSION compression;
 
+    /**
+     *
+     * @param outputRoot
+     * @param handleExisting
+     * @param compression
+     * @param suffix
+     * @see #FSOutputStreamFactory(Path, FSUtil.HANDLE_EXISTING, COMPRESSION, String)
+     */
+    @Deprecated
     public FSOutputStreamFactory(File outputRoot, FSUtil.HANDLE_EXISTING handleExisting,
                                  COMPRESSION compression, String suffix) {
+        this(Paths.get(outputRoot.toURI()),
+                handleExisting, compression, suffix);
+    }
+    public FSOutputStreamFactory(Path outputRoot, FSUtil.HANDLE_EXISTING handleExisting,
+                                 COMPRESSION compression, String suffix) {
         this.handleExisting = handleExisting;
-        this.outputRoot = outputRoot.getAbsoluteFile();
+        this.outputRoot = outputRoot;
         this.suffix = suffix;
         this.compression = compression;
     }
@@ -68,27 +85,30 @@ public class FSOutputStreamFactory imple
     @Override
     public OutputStream getOutputStream(Metadata metadata) throws IOException {
         String initialRelativePath = metadata.get(FSProperties.FS_REL_PATH);
-        File outputFile = FSUtil.getOutputFile(outputRoot, initialRelativePath, handleExisting, suffix);
-        if (outputFile == null) {
+        Path outputPath = FSUtil.getOutputPath(outputRoot, initialRelativePath, handleExisting, suffix);
+        if (outputPath == null) {
             return null;
         }
-        if (! outputFile.getParentFile().isDirectory()) {
-            boolean success = outputFile.getParentFile().mkdirs();
-            //with multithreading, it is possible that the parent file was created between
-            //the test and the attempt to .mkdirs(); mkdirs() returns false if the dirs already exist
-            if (! success && ! outputFile.getParentFile().isDirectory()) {
-                throw new IOException("Couldn't create parent directory for:"+outputFile.getAbsolutePath());
+        if (!Files.isDirectory(outputPath.getParent())) {
+            Files.createDirectories(outputPath.getParent());
+            //TODO: shouldn't need this any more in java 7, right?
+            if (! Files.isDirectory(outputPath.getParent())) {
+                throw new IOException("Couldn't create parent directory for:"+outputPath.toAbsolutePath());
             }
         }
 
-        OutputStream os = new FileOutputStream(outputFile);
-        if (compression == COMPRESSION.BZIP2){
-            os = new BZip2CompressorOutputStream(os);
-        } else if (compression == COMPRESSION.GZIP) {
-            os = new GZIPOutputStream(os);
-        } else if (compression == COMPRESSION.ZIP) {
-            os = new ZipArchiveOutputStream(os);
+        OutputStream os = Files.newOutputStream(outputPath);
+        switch (compression) {
+            case BZIP2:
+                os = new BZip2CompressorOutputStream(os);
+                break;
+            case GZIP:
+                os = new GZIPOutputStream(os);
+                break;
+            case ZIP:
+                os = new ZipArchiveOutputStream(os);
+                break;
         }
-        return os;
+        return new BufferedOutputStream(os);
     }
 }

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSUtil.java Wed Sep 30 15:59:57 2015
@@ -19,6 +19,9 @@ package org.apache.tika.batch.fs;
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -29,6 +32,7 @@ import java.util.regex.Pattern;
  */
 public class FSUtil {
 
+    @Deprecated
     public static boolean checkThisIsAncestorOfThat(File ancestor, File child) {
         int ancLen = ancestor.getAbsolutePath().length();
         int childLen = child.getAbsolutePath().length();
@@ -41,6 +45,7 @@ public class FSUtil {
 
     }
 
+    @Deprecated
     public static boolean checkThisIsAncestorOfOrSameAsThat(File ancestor, File child) {
         if (ancestor.equals(child)) {
             return true;
@@ -79,18 +84,57 @@ public class FSUtil {
      * after trying to increment the file count (e.g. fileA(2).docx) 10000 times
      * and then after trying 20,000 UUIDs.
      *
-     * @param outputRoot directory root for output
+     * @param outputRoot          directory root for output
      * @param initialRelativePath initial relative path (including file name, which may be renamed)
-     * @param handleExisting what to do if the output file exists
-     * @param suffix suffix to add to files, can be null
+     * @param handleExisting      what to do if the output file exists
+     * @param suffix              suffix to add to files, can be null
      * @return output file or null if no output file should be created
      * @throws java.io.IOException
+     * @see #getOutputPath(Path, String, HANDLE_EXISTING, String)
      */
+    @Deprecated
     public static File getOutputFile(File outputRoot, String initialRelativePath,
                                      HANDLE_EXISTING handleExisting, String suffix) throws IOException {
+        return getOutputPath(Paths.get(outputRoot.toURI()),
+                initialRelativePath, handleExisting, suffix).toFile();
+    }
+
+    /**
+     * Given an output root and an initial relative path,
+     * return the output file according to the HANDLE_EXISTING strategy
+     * <p/>
+     * In the most basic use case, given a root directory "input",
+     * a file's relative path "dir1/dir2/fileA.docx", and an output directory
+     * "output", the output file would be "output/dir1/dir2/fileA.docx."
+     * <p/>
+     * If HANDLE_EXISTING is set to OVERWRITE, this will not check to see if the output already exists,
+     * and the returned file could overwrite an existing file!!!
+     * <p/>
+     * If HANDLE_EXISTING is set to RENAME, this will try to increment a counter at the end of
+     * the file name (fileA(2).docx) until there is a file name that doesn't exist.
+     * <p/>
+     * This will return null if handleExisting == HANDLE_EXISTING.SKIP and
+     * the candidate file already exists.
+     * <p/>
+     * This will throw an IOException if HANDLE_EXISTING is set to
+     * RENAME, and a candidate cannot output file cannot be found
+     * after trying to increment the file count (e.g. fileA(2).docx) 10000 times
+     * and then after trying 20,000 UUIDs.
+     *
+     * @param outputRoot          root directory into which to put the path
+     * @param initialRelativePath relative path including file ("somedir/subdir1/file.doc")
+     * @param handleExisting      policy for what to do if the output path already exists
+     * @param suffix              suffix to add to the output path
+     * @return can return null
+     * @throws IOException
+     */
+    public static Path getOutputPath(Path outputRoot, String initialRelativePath,
+                                     HANDLE_EXISTING handleExisting, String suffix) throws IOException {
+
         String localSuffix = (suffix == null) ? "" : suffix;
-        File cand = new File(outputRoot, initialRelativePath+ "." +localSuffix);
-        if (cand.isFile()) {
+        Path cand = FSUtil.resolveRelative(outputRoot,
+                initialRelativePath + "." + localSuffix);
+        if (Files.exists(cand)) {
             if (handleExisting.equals(HANDLE_EXISTING.OVERWRITE)) {
                 return cand;
             } else if (handleExisting.equals(HANDLE_EXISTING.SKIP)) {
@@ -110,8 +154,9 @@ public class FSUtil {
         String fNameBase = null;
         String fNameExt = "";
         //this doesn't include the addition of the localSuffix
-        File candOnly = new File(outputRoot, initialRelativePath);
-        Matcher m = FILE_NAME_PATTERN.matcher(candOnly.getName());
+        Path candOnly = FSUtil.resolveRelative(outputRoot,
+                initialRelativePath);
+        Matcher m = FILE_NAME_PATTERN.matcher(candOnly.getFileName().toString());
         if (m.find()) {
             fNameBase = m.group(1);
 
@@ -127,23 +172,40 @@ public class FSUtil {
             }
         }
 
-        File outputParent = cand.getParentFile();
-        while (fNameBase != null && cand.isFile() && ++cnt < 10000) {
-            String candFileName = fNameBase + "(" + cnt + ")." + fNameExt+ "" +localSuffix;
-            cand = new File(outputParent, candFileName);
+        Path outputParent = cand.getParent();
+        while (fNameBase != null && Files.exists(cand) && ++cnt < 10000) {
+            String candFileName = fNameBase + "(" + cnt + ")." + fNameExt + "" + localSuffix;
+            cand = FSUtil.resolveRelative(outputParent, candFileName);
         }
         //reset count to 0 and try 20000 times
         cnt = 0;
-        while (cand.isFile() && cnt++ < 20000) {
+        while (Files.exists(cand) && cnt++ < 20000) {
             UUID uid = UUID.randomUUID();
-            cand = new File(outputParent, uid.toString() + fNameExt+ "" +localSuffix);
+            cand = FSUtil.resolveRelative(outputParent,
+                    uid.toString() + fNameExt + "" + localSuffix);
         }
 
-        if (cand.isFile()) {
+        if (Files.exists(cand)) {
             throw new IOException("Couldn't find candidate output file after trying " +
                     "very, very hard");
         }
         return cand;
     }
 
+    /**
+     * Convenience method to ensure that "other" is not an absolute path.
+     * One could imagine malicious use of this.
+     *
+     * @param p
+     * @param other
+     * @return resolved path
+     * @throws IllegalArgumentException if "other" is an absolute path
+     */
+    public static Path resolveRelative(Path p, String other) {
+        Path op = Paths.get(other);
+        if (op.isAbsolute()) {
+            throw new IllegalArgumentException(other + " cannot be an absolute path!");
+        }
+        return p.resolve(op);
+    }
 }
\ No newline at end of file

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java Wed Sep 30 15:59:57 2015
@@ -17,7 +17,10 @@
 
 package org.apache.tika.batch.fs.builders;
 
-import java.io.File;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -86,8 +89,8 @@ public class BasicTikaFSConsumersBuilder
             }
         }
         if (tikaConfigPath != null) {
-            try {
-                config = new TikaConfig(new File(tikaConfigPath));
+            try (InputStream is = Files.newInputStream(Paths.get(tikaConfigPath))) {
+                config = new TikaConfig(is);
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
@@ -166,7 +169,7 @@ public class BasicTikaFSConsumersBuilder
     private OutputStreamFactory getOutputStreamFactory(Node node, Map<String, String> runtimeAttributes) {
         Map<String, String> attrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
 
-        File outputDir = PropsUtil.getFile(attrs.get("outputDir"), null);
+        Path outputDir = PropsUtil.getPath(attrs.get("outputDir"), null);
 /*        FSUtil.HANDLE_EXISTING handleExisting = null;
         String handleExistingString = attrs.get("handleExisting");
         if (handleExistingString == null) {
@@ -194,7 +197,7 @@ public class BasicTikaFSConsumersBuilder
         }
         String suffix = attrs.get("outputSuffix");
 
-        //TODO: possibly open up the different handle existings in the future
+        //TODO: possibly open up the different handle-existings in the future
         //but for now, lock it down to require skip.  Too dangerous otherwise
         //if the driver restarts and this is set to overwrite...
         return new FSOutputStreamFactory(outputDir, FSUtil.HANDLE_EXISTING.SKIP,

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/FSCrawlerBuilder.java Wed Sep 30 15:59:57 2015
@@ -18,7 +18,12 @@ package org.apache.tika.batch.fs.builder
  */
 
 
-import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.ArrayBlockingQueue;
@@ -30,6 +35,7 @@ import org.apache.tika.batch.builders.Ba
 import org.apache.tika.batch.builders.ICrawlerBuilder;
 import org.apache.tika.batch.fs.FSDirectoryCrawler;
 import org.apache.tika.batch.fs.FSDocumentSelector;
+import org.apache.tika.batch.fs.FSListCrawler;
 import org.apache.tika.extractor.DocumentSelector;
 import org.apache.tika.util.PropsUtil;
 import org.apache.tika.util.XMLDOMUtil;
@@ -62,7 +68,8 @@ public class FSCrawlerBuilder implements
         Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
 
         int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
-        File inputDir = PropsUtil.getFile(attributes.get(INPUT_DIR_ATTR), new File("input"));
+        Path inputDir = PropsUtil.getPath(attributes.get(INPUT_DIR_ATTR),
+                Paths.get("input"));
         FileResourceCrawler crawler = null;
         if (attributes.containsKey("fileList")) {
             String randomCrawlString = attributes.get(CRAWL_ORDER);
@@ -71,18 +78,23 @@ public class FSCrawlerBuilder implements
                 //TODO: change to logger warn or throw RuntimeException?
                 System.err.println("randomCrawl attribute is ignored by FSListCrawler");
             }
-            File fileList = PropsUtil.getFile(attributes.get("fileList"), null);
-            String encoding = PropsUtil.getString(attributes.get("fileListEncoding"), "UTF-8");
+            Path fileList = PropsUtil.getPath(attributes.get("fileList"), null);
+            String encodingString = PropsUtil.getString(attributes.get("fileListEncoding"), "UTF-8");
+
             try {
-                crawler = new org.apache.tika.batch.fs.FSListCrawler(queue, numConsumers, inputDir, fileList, encoding);
-            } catch (java.io.FileNotFoundException e) {
-                throw new RuntimeException("fileList file not found for FSListCrawler: " + fileList.getAbsolutePath());
-            } catch (java.io.UnsupportedEncodingException e) {
-                throw new RuntimeException("fileList encoding not supported: "+encoding);
+                Charset encoding = Charset.forName(encodingString);
+                crawler = new FSListCrawler(queue, numConsumers, inputDir, fileList, encoding);
+            } catch (FileNotFoundException e) {
+                throw new RuntimeException("fileList file not found for FSListCrawler: " +
+                        fileList.toAbsolutePath());
+            } catch (UnsupportedEncodingException e) {
+                throw new RuntimeException("fileList encoding not supported: "+encodingString);
+            } catch (IOException e) {
+                throw new RuntimeException("IOException while trying to open fileList: " + e.getMessage());
             }
         } else {
             FSDirectoryCrawler.CRAWL_ORDER crawlOrder = getCrawlOrder(attributes.get(CRAWL_ORDER));
-            File startDir = PropsUtil.getFile(attributes.get(INPUT_START_DIR_ATTR), null);
+            Path startDir = PropsUtil.getPath(attributes.get(INPUT_START_DIR_ATTR), null);
             if (startDir == null) {
                 crawler = new FSDirectoryCrawler(queue, numConsumers, inputDir, crawlOrder);
             } else {

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java Wed Sep 30 15:59:57 2015
@@ -17,11 +17,15 @@ package org.apache.tika.batch.fs.strawma
  * limitations under the License.
  */
 
-import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
@@ -53,111 +57,107 @@ public class StrawManTikaAppDriver imple
     private static AtomicInteger threadCount = new AtomicInteger(0);
     private final int totalThreads;
     private final int threadNum;
-    private int rootLen = -1;
-    private File inputDir = null;
-    private File outputDir = null;
+    private Path inputRoot = null;
+    private Path outputRoot = null;
     private String[] args = null;
     private Logger logger = LoggerFactory.getLogger(StrawManTikaAppDriver.class);
 
 
-    public StrawManTikaAppDriver(File inputDir, File outputDir, int totalThreads, String[] args) {
-        rootLen = inputDir.getAbsolutePath().length()+1;
-        this.inputDir = inputDir;
-        this.outputDir = outputDir;
+    public StrawManTikaAppDriver(Path inputRoot, Path outputRoot,
+                                 int totalThreads, String[] args) {
+        this.inputRoot = inputRoot;
+        this.outputRoot = outputRoot;
         this.args = args;
         threadNum = threadCount.getAndIncrement();
         this.totalThreads = totalThreads;
     }
 
 
-    private int processDirectory(File inputDir) {
-        int processed = 0;
-        if (inputDir == null || inputDir.listFiles() == null) {
-            return processed;
-        }
-        for (File f : inputDir.listFiles()) {
-            List<File> childDirs = new ArrayList<File>();
-            if (f.isDirectory()) {
-                childDirs.add(f);
-            } else {
-                processed += processFile(f);
-            }
-            for (File dir : childDirs) {
-                processed += processDirectory(dir);
+    private class TikaVisitor extends SimpleFileVisitor<Path> {
+        private int processed = 0;
 
-            }
+        int getProcessed() {
+            return processed;
         }
-        return processed;
-    }
-
-    private int processFile(File f) {
-        if (totalThreads > 1) {
-            int hashCode = f.getAbsolutePath().hashCode();
-            if (Math.abs(hashCode % totalThreads) != threadNum) {
-                return 0;
+        @Override
+        public FileVisitResult visitFile(Path file,
+                                         BasicFileAttributes attr) {
+            if (totalThreads > 1) {
+                int hashCode = file.toAbsolutePath().toString().hashCode();
+                if (Math.abs(hashCode % totalThreads) != threadNum) {
+                    return FileVisitResult.CONTINUE;
+                }
             }
-        }
-        File outputFile = new File(outputDir, f.getAbsolutePath().substring(rootLen)+".txt");
-        outputFile.getAbsoluteFile().getParentFile().mkdirs();
-        if (! outputFile.getParentFile().exists()) {
-            logger.error(MarkerFactory.getMarker("FATAL"),
-                    "parent directory for "+ outputFile + " was not made!");
-            throw new RuntimeException("couldn't make parent file for " + outputFile);
-        }
-        List<String> commandLine = new ArrayList<String>();
-        for (String arg : args) {
-            commandLine.add(arg);
-        }
-        commandLine.add("-t");
-        commandLine.add("\""+f.getAbsolutePath()+"\"");
-        ProcessBuilder builder = new ProcessBuilder(commandLine.toArray(new String[commandLine.size()]));
-        logger.info("about to process: "+f.getAbsolutePath());
-        Process proc = null;
-        RedirectGobbler gobbler = null;
-        Thread gobblerThread = null;
-        try {
-            OutputStream os = new FileOutputStream(outputFile);
-            proc = builder.start();
-            gobbler = new RedirectGobbler(proc.getInputStream(), os);
-            gobblerThread = new Thread(gobbler);
-            gobblerThread.start();
-        } catch (IOException e) {
-            logger.error(e.getMessage());
-            return 0;
-        }
-
-        boolean finished = false;
-        long totalTime = 180000;//3 minutes
-        long pulse = 100;
-        for (int i = 0; i < totalTime; i += pulse) {
+            assert(file.startsWith(inputRoot));
+            Path relPath = inputRoot.relativize(file);
+            Path outputFile = Paths.get(outputRoot.toAbsolutePath().toString(),
+                    relPath.toString() + ".txt");
             try {
-                Thread.currentThread().sleep(pulse);
-            } catch (InterruptedException e) {
-                //swallow
-            }
+                Files.createDirectories(outputFile.getParent());
+            } catch (IOException e) {
+                logger.error(MarkerFactory.getMarker("FATAL"),
+                        "parent directory for "+ outputFile + " was not made!");
+                throw new RuntimeException("couldn't make parent file for " + outputFile);
+            }
+            List<String> commandLine = new ArrayList<>();
+            for (String arg : args) {
+                commandLine.add(arg);
+            }
+            commandLine.add("-t");
+            commandLine.add("\""+outputFile.toAbsolutePath()+"\"");
+            ProcessBuilder builder = new ProcessBuilder(commandLine.toArray(new String[commandLine.size()]));
+            logger.info("about to process: "+file.toAbsolutePath());
+            Process proc = null;
+            RedirectGobbler gobbler = null;
+            Thread gobblerThread = null;
             try {
-                int exit = proc.exitValue();
-                finished = true;
-                break;
-            } catch (IllegalThreadStateException e) {
-                //swallow
+                OutputStream os = Files.newOutputStream(outputFile);
+                proc = builder.start();
+                gobbler = new RedirectGobbler(proc.getInputStream(), os);
+                gobblerThread = new Thread(gobbler);
+                gobblerThread.start();
+            } catch (IOException e) {
+                logger.error(e.getMessage());
+                return FileVisitResult.CONTINUE;
+            }
+
+            boolean finished = false;
+            long totalTime = 180000;//3 minutes
+            long pulse = 100;
+            for (int i = 0; i < totalTime; i += pulse) {
+                try {
+                    Thread.currentThread().sleep(pulse);
+                } catch (InterruptedException e) {
+                    //swallow
+                }
+                try {
+                    int exit = proc.exitValue();
+                    finished = true;
+                    break;
+                } catch (IllegalThreadStateException e) {
+                    //swallow
+                }
             }
+            if (!finished) {
+                logger.warn("Had to kill process working on: " + file.toAbsolutePath());
+                proc.destroy();
+            }
+            gobbler.close();
+            gobblerThread.interrupt();
+            processed++;
+            return FileVisitResult.CONTINUE;
         }
-        if (!finished) {
-            logger.warn("Had to kill process working on: " + f.getAbsolutePath());
-            proc.destroy();
-        }
-        gobbler.close();
-        gobblerThread.interrupt();
-        return 1;
+
     }
 
 
+
     @Override
     public Integer call() throws Exception {
         long start = new Date().getTime();
-
-        int processed = processDirectory(inputDir);
+        TikaVisitor v = new TikaVisitor();
+        Files.walkFileTree(inputRoot, v);
+        int processed = v.getProcessed();
         double elapsedSecs = ((double)new Date().getTime()-(double)start)/(double)1000;
         logger.info("Finished processing " + processed + " files in " + elapsedSecs + " seconds.");
         return processed;
@@ -202,6 +202,8 @@ public class StrawManTikaAppDriver imple
         }
     }
 
+
+
     public static String usage() {
         StringBuilder sb = new StringBuilder();
         sb.append("Example usage:\n");
@@ -216,8 +218,8 @@ public class StrawManTikaAppDriver imple
         if (args.length < 6) {
             System.err.println(StrawManTikaAppDriver.usage());
         }
-        File inputDir = new File(args[0]);
-        File outputDir = new File(args[1]);
+        Path inputDir = Paths.get(args[0]);
+        Path outputDir = Paths.get(args[1]);
         int totalThreads = Integer.parseInt(args[2]);
 
         List<String> commandLine = new ArrayList<String>();

Modified: tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java (original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/util/PropsUtil.java Wed Sep 30 15:59:57 2015
@@ -18,6 +18,8 @@ package org.apache.tika.util;
  */
 
 import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Locale;
 
 /**
@@ -91,7 +93,9 @@ public class PropsUtil {
      * @param v string to parse
      * @param defaultMissing value to return if value is null or unparseable
      * @return parsed value
+     * @see #getPath(String, Path)
      */
+    @Deprecated
     public static File getFile(String v, File defaultMissing) {
         if (v == null || v.length() == 0) {
             return defaultMissing;
@@ -120,4 +124,26 @@ public class PropsUtil {
         }
         return v;
     }
+
+    /**
+     * Parses v.  If there is a problem, this returns defaultMissing.
+     *
+     * @param v string to parse
+     * @param defaultMissing value to return if value is null or unparseable
+     * @return parsed value
+     * @see #getPath(String, Path)
+     */
+    public static Path getPath(String v, Path defaultMissing) {
+        if (v == null || v.length() == 0) {
+            return defaultMissing;
+        }
+        //trim initial and final " if they exist
+        if (v.startsWith("\"")) {
+            v = v.substring(1);
+        }
+        if (v.endsWith("\"")) {
+            v = v.substring(0, v.length()-1);
+        }
+        return Paths.get(v);
+    }
 }

Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchDriverTest.java Wed Sep 30 15:59:57 2015
@@ -20,15 +20,14 @@ package org.apache.tika.batch.fs;
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
-import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.io.FileUtils;
 import org.apache.tika.batch.BatchProcessDriverCLI;
 import org.junit.Test;
 
@@ -41,28 +40,30 @@ public class BatchDriverTest extends FSB
     @Test(timeout = 15000)
     public void oneHeavyHangTest() throws Exception {
         //batch runner hits one heavy hang file, keep going
-        File outputDir = getNewOutputDir("daemon-");
-        assertNotNull(outputDir.listFiles());
+        Path outputDir = getNewOutputDir("daemon-");
+        assertTrue(Files.isDirectory(outputDir));
         //make sure output directory is empty!
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
 
         String[] args = getDefaultCommandLineArgsArr("one_heavy_hang", outputDir, null);
         BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
         driver.execute();
+
         assertEquals(0, driver.getNumRestarts());
         assertFalse(driver.getUserInterrupted());
-        assertEquals(5, outputDir.listFiles().length);
+        assertEquals(5, countChildren(outputDir));
+
         assertContains("first test file",
-                FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
     }
 
     @Test(timeout = 30000)
     public void restartOnFullHangTest() throws Exception {
         //batch runner hits more heavy hangs than threads; needs to restart
-        File outputDir = getNewOutputDir("daemon-");
+        Path outputDir = getNewOutputDir("daemon-");
 
         //make sure output directory is empty!
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
 
         String[] args = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, null);
         BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
@@ -71,15 +72,15 @@ public class BatchDriverTest extends FSB
         assertTrue(driver.getNumRestarts() > 0);
         assertFalse(driver.getUserInterrupted());
         assertContains("first test file",
-                FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
     }
 
     @Test(timeout = 15000)
     public void noRestartTest() throws Exception {
-        File outputDir = getNewOutputDir("daemon-");
+        Path outputDir = getNewOutputDir("daemon-");
 
         //make sure output directory is empty!
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
 
         String[] args = getDefaultCommandLineArgsArr("no_restart", outputDir, null);
         String[] mod = Arrays.copyOf(args, args.length + 2);
@@ -90,22 +91,20 @@ public class BatchDriverTest extends FSB
         driver.execute();
         assertEquals(0, driver.getNumRestarts());
         assertFalse(driver.getUserInterrupted());
-        File[] files = outputDir.listFiles();
-        assertEquals(2, files.length);
-        File test2 = new File(outputDir, "test2_norestart.xml.xml");
-        assertTrue("test2_norestart.xml", test2.exists());
-        File test3 = new File(outputDir, "test3_ok.xml.xml");
-        assertFalse("test3_ok.xml", test3.exists());
-        assertEquals(0, test3.length());
+        assertEquals(2, countChildren(outputDir));
+        Path test2 = outputDir.resolve("test2_norestart.xml.xml");
+        assertTrue("test2_norestart.xml", Files.exists(test2));
+        Path test3 = outputDir.resolve("test3_ok.xml.xml");
+        assertFalse("test3_ok.xml", Files.exists(test3));
     }
 
     @Test(timeout = 15000)
     public void restartOnOOMTest() throws Exception {
         //batch runner hits more heavy hangs than threads; needs to restart
-        File outputDir = getNewOutputDir("daemon-");
+        Path outputDir = getNewOutputDir("daemon-");
 
         //make sure output directory is empty!
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
 
         String[] args = getDefaultCommandLineArgsArr("oom", outputDir, null);
         BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", args);
@@ -113,7 +112,7 @@ public class BatchDriverTest extends FSB
         assertEquals(1, driver.getNumRestarts());
         assertFalse(driver.getUserInterrupted());
         assertContains("first test file",
-                FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
     }
 
     @Test(timeout = 30000)
@@ -121,8 +120,8 @@ public class BatchDriverTest extends FSB
         //this tests that if all consumers are hung and the crawler is
         //waiting to add to the queue, there isn't deadlock.  The BatchProcess should
         //just shutdown, and the driver should restart
-        File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
-        Map<String, String> args = new HashMap<String,String>();
+        Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+        Map<String, String> args = new HashMap<>();
         args.put("-numConsumers", "2");
         args.put("-maxQueueSize", "2");
         String[] commandLine = getDefaultCommandLineArgsArr("heavy_heavy_hangs", outputDir, args);
@@ -131,7 +130,7 @@ public class BatchDriverTest extends FSB
         assertEquals(3, driver.getNumRestarts());
         assertFalse(driver.getUserInterrupted());
         assertContains("first test file",
-                FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
     }
 
     @Test(timeout = 30000)
@@ -140,8 +139,8 @@ public class BatchDriverTest extends FSB
         //if -maxRestarts is not correctly removed from the commandline,
         //FSBatchProcessCLI's cli parser will throw an Unrecognized option exception
 
-        File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
-        Map<String, String> args = new HashMap<String,String>();
+        Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+        Map<String, String> args = new HashMap<>();
         args.put("-numConsumers", "1");
         args.put("-maxQueueSize", "10");
         args.put("-maxRestarts", "2");
@@ -152,14 +151,14 @@ public class BatchDriverTest extends FSB
         driver.execute();
         assertEquals(2, driver.getNumRestarts());
         assertFalse(driver.getUserInterrupted());
-        assertEquals(3, outputDir.listFiles().length);
+        assertEquals(3, countChildren(outputDir));
     }
 
     @Test(timeout = 30000)
     public void maxRestartsBadParameter() throws Exception {
         //tests that maxRestarts must be followed by an Integer
-        File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
-        Map<String, String> args = new HashMap<String,String>();
+        Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+        Map<String, String> args = new HashMap<>();
         args.put("-numConsumers", "1");
         args.put("-maxQueueSize", "10");
         args.put("-maxRestarts", "zebra");
@@ -180,22 +179,22 @@ public class BatchDriverTest extends FSB
         //tests that if something goes horribly wrong with FSBatchProcessCLI
         //the driver will not restart it again and again
         //this calls a bad xml file which should trigger a no restart exit.
-        File outputDir = getNewOutputDir("nostart-norestart-");
-        Map<String, String> args = new HashMap<String,String>();
+        Path outputDir = getNewOutputDir("nostart-norestart-");
+        Map<String, String> args = new HashMap<>();
         args.put("-numConsumers", "1");
         args.put("-maxQueueSize", "10");
 
         String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args);
         BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-broken.xml", commandLine);
         driver.execute();
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
         assertEquals(0, driver.getNumRestarts());
     }
 
     @Test(timeout = 30000)
     public void testNoRestartIfProcessFailsTake2() throws Exception {
-        File outputDir = getNewOutputDir("nostart-norestart-");
-        Map<String, String> args = new HashMap<String,String>();
+        Path outputDir = getNewOutputDir("nostart-norestart-");
+        Map<String, String> args = new HashMap<>();
         args.put("-numConsumers", "1");
         args.put("-maxQueueSize", "10");
         args.put("-somethingOrOther", "I don't Know");
@@ -203,7 +202,7 @@ public class BatchDriverTest extends FSB
         String[] commandLine = getDefaultCommandLineArgsArr("basic", outputDir, args);
         BatchProcessDriverCLI driver = getNewDriver("/tika-batch-config-test.xml", commandLine);
         driver.execute();
-        assertEquals(0, outputDir.listFiles().length);
+        assertEquals(0, countChildren(outputDir));
         assertEquals(0, driver.getNumRestarts());
     }
 

Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java Wed Sep 30 15:59:57 2015
@@ -23,11 +23,13 @@ import static org.junit.Assert.assertFal
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.io.FileUtils;
 import org.apache.tika.batch.BatchProcess;
 import org.apache.tika.batch.BatchProcessDriverCLI;
 import org.junit.Test;
@@ -37,15 +39,15 @@ public class BatchProcessTest extends FS
     @Test(timeout = 15000)
     public void oneHeavyHangTest() throws Exception {
 
-        File outputDir = getNewOutputDir("one_heavy_hang-");
+        Path outputDir = getNewOutputDir("one_heavy_hang-");
 
         Map<String, String> args = getDefaultArgs("one_heavy_hang", outputDir);
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
-        assertEquals(5, outputDir.listFiles().length);
-        File hvyHang = new File(outputDir, "test0_heavy_hang.xml.xml");
-        assertTrue(hvyHang.exists());
-        assertEquals(0, hvyHang.length());
+        assertEquals(5, countChildren(outputDir));
+        Path hvyHang = outputDir.resolve("test0_heavy_hang.xml.xml");
+        assertTrue(Files.exists(hvyHang));
+        assertEquals(0, Files.size(hvyHang));
         assertNotContained(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
                 streamStrings.getErrString());
     }
@@ -55,16 +57,17 @@ public class BatchProcessTest extends FS
     public void allHeavyHangsTest() throws Exception {
         //each of the three threads hits a heavy hang.  The BatchProcess runs into
         //all timedouts and shuts down.
-        File outputDir = getNewOutputDir("allHeavyHangs-");
+        Path outputDir = getNewOutputDir("allHeavyHangs-");
         Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
 
-        assertEquals(3, outputDir.listFiles().length);
-        for (File hvyHang : outputDir.listFiles()){
-            assertTrue(hvyHang.exists());
-            assertEquals("file length for "+hvyHang.getName()+" should be 0, but is: " +hvyHang.length(),
-                    0, hvyHang.length());
+        assertEquals(3, countChildren(outputDir));
+        for (Path hvyHang : listPaths(outputDir)){
+            assertTrue(Files.exists(hvyHang));
+            assertEquals("file length for "+hvyHang.getFileName()+" should be 0, but is: " +
+                            Files.size(hvyHang),
+                    0, Files.size(hvyHang));
         }
         assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
                 streamStrings.getErrString());
@@ -72,20 +75,20 @@ public class BatchProcessTest extends FS
 
     @Test(timeout = 30000)
     public void allHeavyHangsTestWithCrazyNumberConsumersTest() throws Exception {
-        File outputDir = getNewOutputDir("allHeavyHangsCrazyNumberConsumers-");
+        Path outputDir = getNewOutputDir("allHeavyHangsCrazyNumberConsumers-");
         Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
         args.put("numConsumers", "100");
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
-        assertEquals(7, outputDir.listFiles().length);
+        assertEquals(7, countChildren(outputDir));
 
         for (int i = 0; i < 6; i++){
-            File hvyHang = new File(outputDir, "test"+i+"_heavy_hang.xml.xml");
-            assertTrue(hvyHang.exists());
-            assertEquals(0, hvyHang.length());
+            Path hvyHang = outputDir.resolve("test"+i+"_heavy_hang.xml.xml");
+            assertTrue(Files.exists(hvyHang));
+            assertEquals(0, Files.size(hvyHang));
         }
         assertContains("This is tika-batch's first test file",
-                FileUtils.readFileToString(new File(outputDir, "test6_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test6_ok.xml.xml"), UTF_8));
 
         //key that the process realize that there were no more processable files
         //in the queue and does not ask for a restart!
@@ -98,19 +101,19 @@ public class BatchProcessTest extends FS
         //this tests that if all consumers are hung and the crawler is
         //waiting to add to the queue, there isn't deadlock.  The batchrunner should
         //shutdown and ask to be restarted.
-        File outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
+        Path outputDir = getNewOutputDir("allHeavyHangsStarvedCrawler-");
         Map<String, String> args = getDefaultArgs("heavy_heavy_hangs", outputDir);
         args.put("numConsumers", "2");
         args.put("maxQueueSize", "2");
         args.put("timeoutThresholdMillis", "100000000");//make sure that the batch process doesn't time out
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
-        assertEquals(2, outputDir.listFiles().length);
+        assertEquals(2, countChildren(outputDir));
 
         for (int i = 0; i < 2; i++){
-            File hvyHang = new File(outputDir, "test"+i+"_heavy_hang.xml.xml");
-            assertTrue(hvyHang.exists());
-            assertEquals(0, hvyHang.length());
+            Path hvyHang = outputDir.resolve("test"+i+"_heavy_hang.xml.xml");
+            assertTrue(Files.exists(hvyHang));
+            assertEquals(0, Files.size(hvyHang));
         }
         assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
                 streamStrings.getErrString());
@@ -125,7 +128,7 @@ public class BatchProcessTest extends FS
         //no consumers should process test2-4.txt!
         //i.e. the first consumer will finish in 10 seconds and
         //then otherwise would be looking for more, but the oom should prevent that
-        File outputDir = getNewOutputDir("oom-");
+        Path outputDir = getNewOutputDir("oom-");
 
         Map<String, String> args = getDefaultArgs("oom", outputDir);
         args.put("numConsumers", "3");
@@ -134,9 +137,9 @@ public class BatchProcessTest extends FS
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
 
-        assertEquals(4, outputDir.listFiles().length);
+        assertEquals(4, countChildren(outputDir));
         assertContains("This is tika-batch's first test file",
-                FileUtils.readFileToString(new File(outputDir, "test2_ok.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test2_ok.xml.xml"), UTF_8));
 
         assertContains(BatchProcess.BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString(),
                 streamStrings.getErrString());
@@ -146,7 +149,7 @@ public class BatchProcessTest extends FS
 
     @Test(timeout = 15000)
     public void noRestart() throws Exception {
-        File outputDir = getNewOutputDir("no_restart");
+        Path outputDir = getNewOutputDir("no_restart");
 
         Map<String, String> args = getDefaultArgs("no_restart", outputDir);
         args.put("numConsumers", "1");
@@ -154,12 +157,11 @@ public class BatchProcessTest extends FS
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
 
         StreamStrings streamStrings = ex.execute();
-        File[] files = outputDir.listFiles();
-        File test2 = new File(outputDir, "test2_norestart.xml.xml");
-        assertTrue("test2_norestart.xml", test2.exists());
-        File test3 = new File(outputDir, "test3_ok.xml.xml");
-        assertFalse("test3_ok.xml", test3.exists());
-        assertEquals(0, test3.length());
+
+        Path test2 = outputDir.resolve("test2_norestart.xml.xml");
+        assertTrue("test2_norestart.xml", Files.exists(test2));
+        Path test3 = outputDir.resolve("test3_ok.xml.xml");
+        assertFalse("test3_ok.xml", Files.exists(test3));
         assertContains("exitStatus="+ BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
                 streamStrings.getOutString());
         assertContains("causeForTermination='MAIN_LOOP_EXCEPTION_NO_RESTART'",
@@ -175,7 +177,7 @@ public class BatchProcessTest extends FS
      */
     @Test(timeout = 60000)
     public void testWaitAfterEarlyTermination() throws Exception {
-        File outputDir = getNewOutputDir("wait_after_early_termination");
+        Path outputDir = getNewOutputDir("wait_after_early_termination");
 
         Map<String, String> args = getDefaultArgs("wait_after_early_termination", outputDir);
         args.put("numConsumers", "1");
@@ -186,19 +188,19 @@ public class BatchProcessTest extends FS
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
 
         StreamStrings streamStrings = ex.execute();
-        File[] files = outputDir.listFiles();
-        assertEquals(1, files.length);
+        assertEquals(1, countChildren(outputDir));
         assertContains("<p>some content</p>",
-                FileUtils.readFileToString(new File(outputDir, "test0_sleep.xml.xml"), UTF_8));
+                readFileToString(outputDir.resolve("test0_sleep.xml.xml"), UTF_8));
 
-        assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE, streamStrings.getOutString());
+        assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE,
+                streamStrings.getOutString());
         assertContains("causeForTermination='BATCH_PROCESS_ALIVE_TOO_LONG'",
                 streamStrings.getOutString());
     }
 
     @Test(timeout = 60000)
     public void testTimeOutAfterBeingAskedToShutdown() throws Exception {
-        File outputDir = getNewOutputDir("timeout_after_early_termination");
+        Path outputDir = getNewOutputDir("timeout_after_early_termination");
 
         Map<String, String> args = getDefaultArgs("timeout_after_early_termination", outputDir);
         args.put("numConsumers", "1");
@@ -208,9 +210,9 @@ public class BatchProcessTest extends FS
 
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
-        File[] files = outputDir.listFiles();
-        assertEquals(1, files.length);
-        assertEquals(0, files[0].length());
+        List<Path> paths = listPaths(outputDir);
+        assertEquals(1, paths.size());
+        assertEquals(0, Files.size(paths.get(0)));
         assertContains("exitStatus="+BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE, streamStrings.getOutString());
         assertContains("causeForTermination='BATCH_PROCESS_ALIVE_TOO_LONG'",
                 streamStrings.getOutString());
@@ -219,7 +221,7 @@ public class BatchProcessTest extends FS
     @Test(timeout = 10000)
     public void testRedirectionOfStreams() throws Exception {
         //test redirection of system.err to system.out
-        File outputDir = getNewOutputDir("noisy_parsers");
+        Path outputDir = getNewOutputDir("noisy_parsers");
 
         Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
         args.put("numConsumers", "1");
@@ -227,8 +229,7 @@ public class BatchProcessTest extends FS
 
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args);
         StreamStrings streamStrings = ex.execute();
-        File[] files = outputDir.listFiles();
-        assertEquals(1, files.length);
+        assertEquals(1, countChildren(outputDir));
         assertContains("System.out", streamStrings.getOutString());
         assertContains("System.err", streamStrings.getOutString());
         assertEquals(0, streamStrings.getErrString().length());
@@ -237,7 +238,7 @@ public class BatchProcessTest extends FS
 
     @Test(timeout = 10000)
     public void testConsumersManagerInitHang() throws Exception {
-        File outputDir = getNewOutputDir("init_hang");
+        Path outputDir = getNewOutputDir("init_hang");
 
         Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
         args.put("numConsumers", "1");
@@ -250,7 +251,7 @@ public class BatchProcessTest extends FS
 
     @Test(timeout = 10000)
     public void testConsumersManagerShutdownHang() throws Exception {
-        File outputDir = getNewOutputDir("shutdown_hang");
+        Path outputDir = getNewOutputDir("shutdown_hang");
 
         Map<String, String> args = getDefaultArgs("noisy_parsers", outputDir);
         args.put("numConsumers", "1");
@@ -267,26 +268,27 @@ public class BatchProcessTest extends FS
         //tests to make sure that hierarchy is maintained when reading from
         //file list
         //also tests that list actually works.
-        File outputDir = getNewOutputDir("hierarchical_file_list");
+        Path outputDir = getNewOutputDir("hierarchical_file_list");
 
         Map<String, String> args = getDefaultArgs("hierarchical", outputDir);
         args.put("numConsumers", "1");
-        args.put("fileList", this.getClass().getResource("/testFileList.txt").getPath());
+        args.put("fileList",
+                Paths.get(this.getClass().getResource("/testFileList.txt").toURI()).toString());
         args.put("recursiveParserWrapper", "true");
         args.put("basicHandlerType", "text");
         args.put("outputSuffix", "json");
         BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args, "/tika-batch-config-MockConsumersBuilder.xml");
         ex.execute();
-        File test1 = new File(outputDir, "test1.xml.json");
-        File test2 = new File(outputDir, "sub1a/test2.xml.json");
-        File test3 = new File(outputDir, "sub1a/sub2a/test3.xml.json");
-        assertTrue("test1 exists", test1.exists());
-        assertTrue("test1 length > 10", test1.length() > 10);
-        assertTrue(test3.exists() && test3.length() > 10);
-        File test2Dir = new File(outputDir, "sub1a");
+        Path test1 = outputDir.resolve("test1.xml.json");
+        Path test2 = outputDir.resolve("sub1a/test2.xml.json");
+        Path test3 = outputDir.resolve("sub1a/sub2a/test3.xml.json");
+        assertTrue("test1 exists", Files.exists(test1));
+        assertTrue("test1 length > 10", Files.size(test1) > 10);
+        assertTrue(Files.exists(test3) && Files.size(test3) > 10);
+        Path test2Dir = outputDir.resolve("sub1a");
         //should be just the subdirectory, no actual test2 file
-        assertEquals(1, test2Dir.listFiles().length);
-        assertFalse(test2.exists());
+        assertEquals(1, countChildren(test2Dir));
+        assertFalse(Files.exists(test2));
     }
 
     private class BatchProcessTestExecutor {