You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:59:57 UTC

svn commit: r1706060 [2/2] - in /tika/trunk: ./ tika-app/src/main/java/org/apache/tika/cli/ tika-batch/src/main/java/org/apache/tika/batch/ tika-batch/src/main/java/org/apache/tika/batch/fs/ tika-batch/src/main/java/org/apache/tika/batch/fs/builders/ t...

Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java Wed Sep 30 15:59:57 2015
@@ -17,11 +17,21 @@ package org.apache.tika.batch.fs;
  * limitations under the License.
  */
 
-import java.io.File;
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.nio.file.DirectoryStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
@@ -29,7 +39,6 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.tika.TikaTest;
 import org.apache.tika.batch.BatchProcess;
@@ -56,17 +65,13 @@ import org.junit.BeforeClass;
  */
 public abstract class FSBatchTestBase extends TikaTest {
 
-    private static File outputRoot = null;
+    private static Path outputRoot = null;
 
     @BeforeClass
     public static void setUp() throws Exception {
-
-        File testOutput = new File("target/test-classes/test-output");
-        testOutput.mkdirs();
-        outputRoot = File.createTempFile("tika-batch-output-root-", "", testOutput);
-        outputRoot.delete();
-        outputRoot.mkdirs();
-
+        Path testOutput = Paths.get("target/test-classes/test-output");
+        Files.createDirectories(testOutput);
+        outputRoot = Files.createTempDirectory(testOutput, "tika-batch-output-root-");
     }
 
     @AfterClass
@@ -75,7 +80,7 @@ public abstract class FSBatchTestBase ex
         //see caveat in TikaCLITest's textExtract
 
         try {
-            FileUtils.deleteDirectory(outputRoot);
+            deleteDirectory(outputRoot);
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -92,32 +97,33 @@ public abstract class FSBatchTestBase ex
         }
     }
 
-    File getNewOutputDir(String subdirPrefix) throws IOException {
-        File outputDir = File.createTempFile(subdirPrefix, "", outputRoot);
-        outputDir.delete();
-        outputDir.mkdirs();
+    Path getNewOutputDir(String subdirPrefix) throws IOException {
+        Path outputDir = Files.createTempDirectory(outputRoot, subdirPrefix);
+        assert(countChildren(outputDir) == 0);
         return outputDir;
     }
 
-    Map<String, String> getDefaultArgs(String inputSubDir, File outputDir) throws Exception {
-        Map<String, String> args = new HashMap<String, String>();
-        args.put("inputDir", "\""+getInputRoot(inputSubDir).getAbsolutePath()+"\"");
+    Map<String, String> getDefaultArgs(String inputSubDir, Path outputDir) throws Exception {
+        Map<String, String> args = new HashMap<>();
+
+        args.put("inputDir", "\""+getInputRoot(inputSubDir).toString()+"\"");
         if (outputDir != null) {
-            args.put("outputDir", "\""+outputDir.getAbsolutePath()+"\"");
+            args.put("outputDir", "\""+outputDir.toString()+"\"");
         }
         return args;
     }
 
-    public String[] getDefaultCommandLineArgsArr(String inputSubDir, File outputDir, Map<String, String> commandLine) throws Exception {
-        List<String> args = new ArrayList<String>();
+    public String[] getDefaultCommandLineArgsArr(String inputSubDir,
+                                                 Path outputDir, Map<String, String> commandLine) throws Exception {
+        List<String> args = new ArrayList<>();
         //need to include "-" because these are going to the commandline!
         if (inputSubDir != null) {
             args.add("-inputDir");
-            args.add(getInputRoot(inputSubDir).getAbsolutePath());
+            args.add(getInputRoot(inputSubDir).toAbsolutePath().toString());
         }
         if (outputDir != null) {
             args.add("-outputDir");
-            args.add(outputDir.getAbsolutePath());
+            args.add(outputDir.toAbsolutePath().toString());
         }
         if (commandLine != null) {
             for (Map.Entry<String, String> e : commandLine.entrySet()) {
@@ -129,9 +135,9 @@ public abstract class FSBatchTestBase ex
     }
 
 
-    public File getInputRoot(String subdir) throws Exception {
+    public Path getInputRoot(String subdir) throws Exception {
         String path = (subdir == null || subdir.length() == 0) ? "/test-input" : "/test-input/"+subdir;
-        return new File(this.getClass().getResource(path).toURI());
+        return Paths.get(this.getClass().getResource(path).toURI());
     }
 
     BatchProcess getNewBatchRunner(String testConfig,
@@ -145,18 +151,19 @@ public abstract class FSBatchTestBase ex
     }
 
     public ProcessBuilder getNewBatchRunnerProcess(String testConfig, Map<String, String> args) {
-        List<String> argList = new ArrayList<String>();
+        List<String> argList = new ArrayList<>();
         for (Map.Entry<String, String> e : args.entrySet()) {
             argList.add("-"+e.getKey());
             argList.add(e.getValue());
         }
 
-        String[] fullCommandLine = commandLine(testConfig, argList.toArray(new String[argList.size()]));
+        String[] fullCommandLine = commandLine(testConfig,
+                argList.toArray(new String[argList.size()]));
         return new ProcessBuilder(fullCommandLine);
     }
 
     private String[] commandLine(String testConfig, String[] args) {
-        List<String> commandLine = new ArrayList<String>();
+        List<String> commandLine = new ArrayList<>();
         commandLine.add("java");
         commandLine.add("-Dlog4j.configuration=file:"+
             this.getClass().getResource("/log4j_process.properties").getFile());
@@ -171,9 +178,14 @@ public abstract class FSBatchTestBase ex
         commandLine.add(cp);
         commandLine.add("org.apache.tika.batch.fs.FSBatchProcessCLI");
 
-        String configFile = this.getClass().getResource(testConfig).getFile();
-        commandLine.add("-bc");
+        String configFile = null;
+        try {
+            configFile = Paths.get(this.getClass().getResource(testConfig).toURI()).toAbsolutePath().toString();
+        } catch (URISyntaxException e) {
+            e.printStackTrace();
+        }
 
+        commandLine.add("-bc");
         commandLine.add(configFile);
 
         for (String s : args) {
@@ -184,7 +196,7 @@ public abstract class FSBatchTestBase ex
 
     public BatchProcessDriverCLI getNewDriver(String testConfig,
                                               String[] args) throws Exception {
-        List<String> commandLine = new ArrayList<String>();
+        List<String> commandLine = new ArrayList<>();
         commandLine.add("java");
         commandLine.add("-Xmx128m");
         commandLine.add("-cp");
@@ -197,7 +209,8 @@ public abstract class FSBatchTestBase ex
         commandLine.add(cp);
         commandLine.add("org.apache.tika.batch.fs.FSBatchProcessCLI");
 
-        String configFile = this.getClass().getResource(testConfig).getFile();
+        String configFile = Paths.get(
+                this.getClass().getResource(testConfig).toURI()).toAbsolutePath().toString();
         commandLine.add("-bc");
 
         commandLine.add(configFile);
@@ -217,4 +230,72 @@ public abstract class FSBatchTestBase ex
         Future<ParallelFileProcessingResult> futureResult = executor.submit(process);
         return futureResult.get(10, TimeUnit.SECONDS);
     }
+
+    /**
+     * Counts immediate children only, does not work recursively
+     * @param p
+     * @return
+     * @throws IOException
+     */
+    public static int countChildren(Path p) throws IOException {
+        int i = 0;
+        try (DirectoryStream<Path> ds = Files.newDirectoryStream(p)) {
+            Iterator<Path> it = ds.iterator();
+            while (it.hasNext()) {
+                i++;
+                it.next();
+            }
+        }
+        return i;
+    }
+
+    //REMOVE THIS AND USE FileUtils, once a java 7 option has been added.
+    public static String readFileToString(Path p, Charset cs) throws IOException {
+        StringBuilder sb = new StringBuilder();
+        try (BufferedReader r = Files.newBufferedReader(p, cs)) {
+            String line = r.readLine();
+            while (line != null) {
+                sb.append(line).append("\n");
+                line = r.readLine();
+            }
+        }
+        return sb.toString();
+    }
+
+    //TODO: move this into FileUtils
+    public static void deleteDirectory(Path dir) throws IOException {
+        Files.walkFileTree(dir, new SimpleFileVisitor<Path>() {
+            @Override
+            public FileVisitResult visitFile(Path file,
+                                             BasicFileAttributes attrs) throws IOException {
+                Files.delete(file);
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult postVisitDirectory(Path dir,
+                                                      IOException exc) throws IOException {
+                Files.delete(dir);
+                return FileVisitResult.CONTINUE;
+            }
+
+        });
+    }
+
+    /**
+     * helper method equivalent to File#listFiles()
+     * grabs children only, does not walk recursively
+     * @param p
+     * @return
+     */
+    public static List<Path> listPaths(Path p) throws IOException {
+        List<Path> list = new ArrayList<>();
+        try (DirectoryStream<Path> ds = Files.newDirectoryStream(p)) {
+            Iterator<Path> it = ds.iterator();
+            while (it.hasNext()) {
+                list.add(it.next());
+            }
+        }
+        return list;
+    }
 }

Added: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java?rev=1706060&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java (added)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSFileResourceTest.java Wed Sep 30 15:59:57 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.batch.fs;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Test;
+
+public class FSFileResourceTest {
+    @Test
+    public void testRelativization() throws Exception {
+        //test assertion error if alleged child is not actually child
+        Path root = Paths.get("root/abc/def").toAbsolutePath();
+        Path allegedChild = Paths.get(root.getParent().getParent().toAbsolutePath().toString());
+        try {
+            FSFileResource r = new FSFileResource(root, allegedChild);
+            fail("should have had assertion error: alleged child not actually child of root");
+        } catch (AssertionError e) {
+
+        }
+
+        //test regular workings
+        root = Paths.get("root/abc/def");
+        Path child = Paths.get(root.toString(), "ghi/jkl/lmnop.doc");
+        FSFileResource r = new FSFileResource(root, child);
+        String id = r.getResourceId();
+        assertTrue(id.startsWith("ghi"));
+        assertTrue(id.endsWith("lmnop.doc"));
+    }
+}

Added: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java?rev=1706060&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java (added)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/FSUtilTest.java Wed Sep 30 15:59:57 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.batch.fs;
+
+import static org.junit.Assert.assertTrue;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Test;
+
+public class FSUtilTest {
+
+    @Test
+    public void testSafeResolution() throws Exception {
+        Path cwd = Paths.get(".");
+        String windows = "C:/temp/file.txt";
+        String linux = "/root/dir/file.txt";
+        boolean ex = false;
+        try {
+            FSUtil.resolveRelative(cwd, windows);
+        } catch (IllegalArgumentException e) {
+            ex = true;
+        }
+
+        try {
+            FSUtil.resolveRelative(cwd, linux);
+        } catch (IllegalArgumentException e) {
+            ex = true;
+        }
+
+        assertTrue("IllegalArgumentException should have been thrown", ex);
+    }
+
+}

Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java Wed Sep 30 15:59:57 2015
@@ -21,10 +21,9 @@ import static java.nio.charset.StandardC
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.File;
+import java.nio.file.Path;
 import java.util.Map;
 
-import org.apache.commons.io.FileUtils;
 import org.apache.tika.batch.BatchProcess;
 import org.apache.tika.batch.ParallelFileProcessingResult;
 import org.junit.Test;
@@ -34,15 +33,15 @@ public class HandlerBuilderTest extends
     @Test
     public void testXML() throws Exception {
 
-        File outputDir = getNewOutputDir("handler-xml-");
+        Path outputDir = getNewOutputDir("handler-xml-");
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("basicHandlerType", "xml");
         args.put("outputSuffix", "xml");
 
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
-        File outputFile = new File(outputDir, "test0.xml.xml");
-        String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+        Path outputFile = outputDir.resolve("test0.xml.xml");
+        String resultString = readFileToString(outputFile, UTF_8);
         assertTrue(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
         assertTrue(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
         assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -51,15 +50,15 @@ public class HandlerBuilderTest extends
 
     @Test
     public void testHTML() throws Exception {
-        File outputDir = getNewOutputDir("handler-html-");
+        Path outputDir = getNewOutputDir("handler-html-");
 
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("basicHandlerType", "html");
         args.put("outputSuffix", "html");
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
-        File outputFile = new File(outputDir, "test0.xml.html");
-        String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+        Path outputFile = outputDir.resolve("test0.xml.html");
+        String resultString = readFileToString(outputFile, UTF_8);
         assertTrue(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
         assertFalse(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
         assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -67,7 +66,7 @@ public class HandlerBuilderTest extends
 
     @Test
     public void testText() throws Exception {
-        File outputDir = getNewOutputDir("handler-txt-");
+        Path outputDir = getNewOutputDir("handler-txt-");
 
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("basicHandlerType", "txt");
@@ -75,8 +74,8 @@ public class HandlerBuilderTest extends
 
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
-        File outputFile = new File(outputDir, "test0.xml.txt");
-        String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+        Path outputFile = outputDir.resolve("test0.xml.txt");
+        String resultString = readFileToString(outputFile, UTF_8);
         assertFalse(resultString.contains("<html xmlns=\"http://www.w3.org/1999/xhtml\">"));
         assertFalse(resultString.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
         assertTrue(resultString.contains("This is tika-batch's first test file"));
@@ -85,7 +84,7 @@ public class HandlerBuilderTest extends
 
     @Test
     public void testXMLWithWriteLimit() throws Exception {
-        File outputDir = getNewOutputDir("handler-xml-write-limit-");
+        Path outputDir = getNewOutputDir("handler-xml-write-limit-");
 
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("writeLimit", "5");
@@ -93,8 +92,8 @@ public class HandlerBuilderTest extends
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
 
-        File outputFile = new File(outputDir, "test0.xml.xml");
-        String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+        Path outputFile = outputDir.resolve("test0.xml.xml");
+        String resultString = readFileToString(outputFile, UTF_8);
         //this is not ideal. How can we change handlers to writeout whatever
         //they've gotten so far, up to the writeLimit?
         assertTrue(resultString.equals(""));
@@ -102,7 +101,7 @@ public class HandlerBuilderTest extends
 
     @Test
     public void testRecursiveParserWrapper() throws Exception {
-        File outputDir = getNewOutputDir("handler-recursive-parser");
+        Path outputDir = getNewOutputDir("handler-recursive-parser");
 
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("basicHandlerType", "txt");
@@ -111,8 +110,8 @@ public class HandlerBuilderTest extends
 
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
-        File outputFile = new File(outputDir, "test0.xml.json");
-        String resultString = FileUtils.readFileToString(outputFile, UTF_8);
+        Path outputFile = outputDir.resolve("test0.xml.json");
+        String resultString = readFileToString(outputFile, UTF_8);
         assertTrue(resultString.contains("\"author\":\"Nikolai Lobachevsky\""));
         assertTrue(resultString.contains("tika-batch\\u0027s first test file"));
     }

Modified: tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java?rev=1706060&r1=1706059&r2=1706060&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java (original)
+++ tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/OutputStreamFactoryTest.java Wed Sep 30 15:59:57 2015
@@ -20,7 +20,7 @@ package org.apache.tika.batch.fs;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-import java.io.File;
+import java.nio.file.Path;
 import java.util.Map;
 import java.util.concurrent.ExecutionException;
 
@@ -33,11 +33,11 @@ public class OutputStreamFactoryTest ext
 
     @Test
     public void testIllegalState() throws Exception {
-        File outputDir = getNewOutputDir("os-factory-illegal-state-");
+        Path outputDir = getNewOutputDir("os-factory-illegal-state-");
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         run(runner);
-        assertEquals(1, outputDir.listFiles().length);
+        assertEquals(1, countChildren(outputDir));
 
         boolean illegalState = false;
         try {
@@ -52,16 +52,16 @@ public class OutputStreamFactoryTest ext
 
     @Test
     public void testSkip() throws Exception {
-        File outputDir = getNewOutputDir("os-factory-skip-");
+        Path outputDir = getNewOutputDir("os-factory-skip-");
         Map<String, String> args = getDefaultArgs("basic", outputDir);
         args.put("handleExisting", "skip");
         BatchProcess runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         ParallelFileProcessingResult result = run(runner);
-        assertEquals(1, outputDir.listFiles().length);
+        assertEquals(1, countChildren(outputDir));
 
         runner = getNewBatchRunner("/tika-batch-config-test.xml", args);
         result = run(runner);
-        assertEquals(1, outputDir.listFiles().length);
+        assertEquals(1, countChildren(outputDir));
     }
 
     /* turn this back on if there is any need to add "handleExisting"