You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/04 16:47:45 UTC
[2/2] tika git commit: TIKA-1851:factor out test resources that used
to be in core to test-resources module
TIKA-1851:factor out test resources that used to be in core to test-resources module
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/afb6cf26
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/afb6cf26
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/afb6cf26
Branch: refs/heads/2.x
Commit: afb6cf2630b5006091b9862df661efa1d1ac1593
Parents: 29defdd
Author: tballison <ta...@mitre.org>
Authored: Thu Feb 4 10:47:30 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Thu Feb 4 10:47:30 2016 -0500
----------------------------------------------------------------------
tika-batch/pom.xml | 15 +-
tika-core/pom.xml | 11 -
.../src/test/java/org/apache/tika/TikaTest.java | 214 -----------
.../tika/config/AbstractTikaConfigTest.java | 50 ---
.../org/apache/tika/config/DummyExecutor.java | 2 +-
.../org/apache/tika/config/DummyParser.java | 2 +-
.../org/apache/tika/config/TikaConfigTest.java | 29 +-
.../org/apache/tika/parser/DummyParser.java | 10 +-
.../org/apache/tika/parser/mock/MockParser.java | 365 -------------------
tika-parent/pom.xml | 1 -
tika-parser-modules/pom.xml | 5 +-
.../tika-parser-advanced-module/pom.xml | 10 +-
.../tika-parser-cad-module/pom.xml | 10 +-
.../tika-parser-code-module/pom.xml | 10 +-
.../tika-parser-crypto-module/pom.xml | 10 +-
.../tika-parser-database-module/pom.xml | 10 +-
.../tika-parser-ebook-module/pom.xml | 10 +-
.../tika-parser-journal-module/pom.xml | 10 +-
.../tika-parser-multimedia-module/pom.xml | 10 +-
.../tika/parser/ocr/TesseractOCRConfigTest.java | 9 +-
.../tika-parser-office-module/pom.xml | 10 +-
.../tika-parser-package-module/pom.xml | 10 +-
.../tika-parser-pdf-module/pom.xml | 10 +-
.../tika-parser-scientific-module/pom.xml | 10 +-
.../apache/tika/parser/dif/DIFParserTest.java | 8 +-
.../apache/tika/parser/gdal/TestGDALParser.java | 20 +-
.../tika-parser-text-module/pom.xml | 10 +-
.../tika-parser-web-module/pom.xml | 12 +-
tika-parsers/pom.xml | 10 -
.../apache/tika/parser/mock/MockParserTest.java | 247 -------------
tika-server/pom.xml | 7 -
tika-test-resources/pom.xml | 17 +-
.../src/main/java/org/apache/tika/TikaTest.java | 214 +++++++++++
.../tika/config/AbstractTikaConfigTest.java | 50 +++
.../org/apache/tika/parser/mock/MockParser.java | 365 +++++++++++++++++++
.../apache/tika/parser/mock/MockParserTest.java | 247 +++++++++++++
36 files changed, 950 insertions(+), 1090 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-batch/pom.xml
----------------------------------------------------------------------
diff --git a/tika-batch/pom.xml b/tika-batch/pom.xml
index 9401ba2..ffd29b1 100644
--- a/tika-batch/pom.xml
+++ b/tika-batch/pom.xml
@@ -69,14 +69,7 @@
<version>${commons.io.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
+ <groupId>${project.groupId}</groupId>
<artifactId>tika-parsers</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
@@ -87,6 +80,12 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-test-resources</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/pom.xml
----------------------------------------------------------------------
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index f45ac15..fbd04a7 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -123,17 +123,6 @@
</configuration>
</plugin>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <executions>
- <execution>
- <goals>
- <goal>test-jar</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>clirr-maven-plugin</artifactId>
<executions>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/TikaTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/TikaTest.java b/tika-core/src/test/java/org/apache/tika/TikaTest.java
deleted file mode 100644
index 2c6f21f..0000000
--- a/tika-core/src/test/java/org/apache/tika/TikaTest.java
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.tika.extractor.EmbeddedResourceHandler;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.ToXMLContentHandler;
-import org.xml.sax.ContentHandler;
-
-/**
- * Parent class of Tika tests
- */
-public abstract class TikaTest {
- /**
- * This method will give you back the filename incl. the absolute path name
- * to the resource. If the resource does not exist it will give you back the
- * resource name incl. the path.
- *
- * @param name
- * The named resource to search for.
- * @return an absolute path incl. the name which is in the same directory as
- * the the class you've called it from.
- */
- public File getResourceAsFile(String name) throws URISyntaxException {
- URL url = this.getClass().getResource(name);
- if (url != null) {
- return new File(url.toURI());
- } else {
- // We have a file which does not exists
- // We got the path
- url = this.getClass().getResource(".");
- File file = new File(new File(url.toURI()), name);
- if (file == null) {
- fail("Unable to find requested file " + name);
- }
- return file;
- }
- }
-
- public InputStream getResourceAsStream(String name) {
- InputStream stream = this.getClass().getResourceAsStream(name);
- if (stream == null) {
- fail("Unable to find requested resource " + name);
- }
- return stream;
- }
-
- public static void assertContains(String needle, String haystack) {
- assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
- }
- public static <T> void assertContains(T needle, Collection<? extends T> haystack) {
- assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
- }
-
- public static void assertNotContained(String needle, String haystack) {
- assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
- }
- public static <T> void assertNotContained(T needle, Collection<? extends T> haystack) {
- assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
- }
-
- protected static class XMLResult {
- public final String xml;
- public final Metadata metadata;
-
- public XMLResult(String xml, Metadata metadata) {
- this.xml = xml;
- this.metadata = metadata;
- }
- }
-
- protected XMLResult getXML(String filePath, Parser parser, Metadata metadata) throws Exception {
- return getXML(getResourceAsStream("/test-documents/" + filePath), parser, metadata);
- }
-
- protected XMLResult getXML(String filePath, Metadata metadata) throws Exception {
- return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), metadata);
- }
-
- protected XMLResult getXML(String filePath) throws Exception {
- return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), new Metadata());
- }
-
- protected XMLResult getXML(InputStream input, Parser parser, Metadata metadata) throws Exception {
- ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
-
- try {
- ContentHandler handler = new ToXMLContentHandler();
- parser.parse(input, handler, metadata, context);
- return new XMLResult(handler.toString(), metadata);
- } finally {
- input.close();
- }
- }
-
- /**
- * Basic text extraction.
- * <p>
- * Tries to close input stream after processing.
- */
- public String getText(InputStream is, Parser parser, ParseContext context, Metadata metadata) throws Exception{
- ContentHandler handler = new BodyContentHandler(1000000);
- try {
- parser.parse(is, handler, metadata, context);
- } finally {
- is.close();
- }
- return handler.toString();
- }
-
- public String getText(InputStream is, Parser parser, Metadata metadata) throws Exception{
- return getText(is, parser, new ParseContext(), metadata);
- }
-
- public String getText(InputStream is, Parser parser, ParseContext context) throws Exception{
- return getText(is, parser, context, new Metadata());
- }
-
- public String getText(InputStream is, Parser parser) throws Exception{
- return getText(is, parser, new ParseContext(), new Metadata());
- }
-
- /**
- * Keeps track of media types and file names recursively.
- *
- */
- public static class TrackingHandler implements EmbeddedResourceHandler {
- public List<String> filenames = new ArrayList<String>();
- public List<MediaType> mediaTypes = new ArrayList<MediaType>();
-
- private final Set<MediaType> skipTypes;
-
- public TrackingHandler() {
- skipTypes = new HashSet<MediaType>();
- }
-
- public TrackingHandler(Set<MediaType> skipTypes) {
- this.skipTypes = skipTypes;
- }
-
- @Override
- public void handle(String filename, MediaType mediaType,
- InputStream stream) {
- if (skipTypes.contains(mediaType)) {
- return;
- }
- mediaTypes.add(mediaType);
- filenames.add(filename);
- }
- }
-
- /**
- * Copies byte[] of embedded documents into a List.
- */
- public static class ByteCopyingHandler implements EmbeddedResourceHandler {
-
- public List<byte[]> bytes = new ArrayList<byte[]>();
-
- @Override
- public void handle(String filename, MediaType mediaType,
- InputStream stream) {
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- if (! stream.markSupported()) {
- stream = TikaInputStream.get(stream);
- }
- stream.mark(0);
- try {
- IOUtils.copy(stream, os);
- bytes.add(os.toByteArray());
- stream.reset();
- } catch (IOException e) {
- //swallow
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
deleted file mode 100644
index f817ef0..0000000
--- a/tika-core/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.config;
-
-import static org.junit.Assert.assertNotNull;
-
-import java.net.URL;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.parser.ParseContext;
-import org.junit.After;
-
-/**
- * Parent of Junit test classes for {@link TikaConfig}, including
- * Tika Core based ones, and ones in Tika Parsers that do things
- * that {@link TikaConfigTest} can't, do due to a need for the
- * full set of "real" classes of parsers / detectors
- */
-public abstract class AbstractTikaConfigTest extends TikaTest {
- protected static ParseContext context = new ParseContext();
-
- protected static String getConfigPath(String config) throws Exception {
- URL url = TikaConfig.class.getResource(config);
- assertNotNull("Test Tika Config not found: " + config, url);
- return url.toExternalForm();
- }
- protected static TikaConfig getConfig(String config) throws Exception {
- System.setProperty("tika.config", getConfigPath(config));
- return new TikaConfig();
- }
-
- @After
- public void resetConfig() {
- System.clearProperty("tika.config");
- }
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/config/DummyExecutor.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/DummyExecutor.java b/tika-core/src/test/java/org/apache/tika/config/DummyExecutor.java
index c9b5dec..849eda3 100644
--- a/tika-core/src/test/java/org/apache/tika/config/DummyExecutor.java
+++ b/tika-core/src/test/java/org/apache/tika/config/DummyExecutor.java
@@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.tika.concurrent.ConfigurableThreadPoolExecutor;
-public class DummyExecutor extends ThreadPoolExecutor implements ConfigurableThreadPoolExecutor {
+class DummyExecutor extends ThreadPoolExecutor implements ConfigurableThreadPoolExecutor {
public DummyExecutor()
{
super(1,1, 0L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/config/DummyParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/DummyParser.java b/tika-core/src/test/java/org/apache/tika/config/DummyParser.java
index 78caa5c..2d20acc 100644
--- a/tika-core/src/test/java/org/apache/tika/config/DummyParser.java
+++ b/tika-core/src/test/java/org/apache/tika/config/DummyParser.java
@@ -22,7 +22,7 @@ import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
-public class DummyParser extends CompositeParser implements Parser {
+class DummyParser extends CompositeParser implements Parser {
private static final long serialVersionUID = 7179782154785528555L;
private ServiceLoader loader;
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
index 774284c..47286ef 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
@@ -16,6 +16,11 @@
*/
package org.apache.tika.config;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
import java.net.URI;
import java.net.URL;
import java.nio.file.Path;
@@ -25,9 +30,6 @@ import java.util.Map;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.tika.ResourceLoggingClassLoader;
-import org.apache.tika.config.DummyExecutor;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.config.TikaConfigTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
@@ -39,26 +41,21 @@ import org.apache.tika.parser.ParserDecorator;
import org.junit.Before;
import org.junit.Test;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
/**
* Tests for the Tika Config, which don't require real parsers /
* detectors / etc.
- * There's also {@link TikaParserConfigTest} and {@link TikaDetectorConfigTest}
+ * There's also TikaParserConfigTest and TikaDetectorConfigTest
* over in the Tika Parsers project, which do further Tika Config
* testing using real parsers and detectors.
*/
-public class TikaConfigTest extends AbstractTikaConfigTest {
+public class TikaConfigTest {
private ServiceLoader ignoreLoader;
private ServiceLoader warnLoader;
private ServiceLoader throwLoader;
@Before
public void setup() {
- ignoreLoader = new ServiceLoader(getClass().getClassLoader(),
+ ignoreLoader = new ServiceLoader(this.getClass().getClassLoader(),
LoadErrorHandler.IGNORE);
warnLoader = new ServiceLoader(getClass().getClassLoader(),
LoadErrorHandler.WARN);
@@ -288,4 +285,14 @@ public class TikaConfigTest extends AbstractTikaConfigTest {
assertEquals("Should have configured Core Threads", 3, executorService.getCorePoolSize());
assertEquals("Should have configured Max Threads", 10, executorService.getMaximumPoolSize());
}
+
+ protected static String getConfigPath(String config) throws Exception {
+ URL url = TikaConfig.class.getResource(config);
+ assertNotNull("Test Tika Config not found: " + config, url);
+ return url.toExternalForm();
+ }
+ protected static TikaConfig getConfig(String config) throws Exception {
+ System.setProperty("tika.config", getConfigPath(config));
+ return new TikaConfig();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java
index 1e6a377..332106a 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyParser.java
@@ -19,8 +19,8 @@ package org.apache.tika.parser;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -30,16 +30,16 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
- * A Dummy Parser for use with unit tests.
+ * A Dummy Parser for use with unit tests in core.
* <p>
- * See also {@link org.apache.tika.parser.mock.MockParser}.
+ * See also the MockParser in tika-test-resources.
*/
-public class DummyParser extends AbstractParser {
+class DummyParser extends AbstractParser {
private Set<MediaType> types;
private Map<String,String> metadata;
private String xmlText;
- public DummyParser(Set<MediaType> types, Map<String, String> metadata,
+ DummyParser(Set<MediaType> types, Map<String, String> metadata,
String xmlText) {
this.types = types;
this.metadata = metadata;
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
deleted file mode 100644
index a920502..0000000
--- a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
+++ /dev/null
@@ -1,365 +0,0 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.EmbeddedContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.w3c.dom.Document;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * This class enables mocking of parser behavior for use in testing
- * wrappers and drivers of parsers.
- * <p>
- * See resources/test-documents/mock/example.xml in tika-parsers/test for the documentation
- * of all the options for this MockParser.
- * <p>
- * Tests for this class are in tika-parsers.
- * <p>
- * See also {@link org.apache.tika.parser.DummyParser} for another option.
- */
-
-public class MockParser extends AbstractParser {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- Set<MediaType> types = new HashSet<MediaType>();
- MediaType type = MediaType.application("mock+xml");
- types.add(type);
- return types;
- }
-
- @Override
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
- Document doc = null;
- DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
- DocumentBuilder docBuilder = null;
- try {
- docBuilder = fact.newDocumentBuilder();
- doc = docBuilder.parse(stream);
- } catch (ParserConfigurationException e) {
- throw new IOException(e);
- } catch (SAXException e) {
- throw new IOException(e);
- }
- Node root = doc.getDocumentElement();
- NodeList actions = root.getChildNodes();
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
- for (int i = 0; i < actions.getLength(); i++) {
- executeAction(actions.item(i), metadata, context, xhtml);
- }
- xhtml.endDocument();
- }
-
- private void executeAction(Node action, Metadata metadata, ParseContext context,
- XHTMLContentHandler xhtml) throws SAXException,
- IOException, TikaException {
-
- if (action.getNodeType() != 1) {
- return;
- }
-
- String name = action.getNodeName();
- if ("metadata".equals(name)) {
- metadata(action, metadata);
- } else if("write".equals(name)) {
- write(action, xhtml);
- } else if ("throw".equals(name)) {
- throwIt(action);
- } else if ("hang".equals(name)) {
- hang(action);
- } else if ("oom".equals(name)) {
- kabOOM();
- } else if ("print_out".equals(name) || "print_err".equals(name)){
- print(action, name);
- } else if ("embedded".equals(name)) {
- handleEmbedded(action, xhtml, context);
- } else if ("throwIllegalChars".equals(name)) {
- throwIllegalChars();
- } else {
- throw new IllegalArgumentException("Didn't recognize mock action: "+name);
- }
- }
-
- private void throwIllegalChars() throws IOException {
- throw new IOException("Can't say \u0000 in xml or \u0001 or \u0002 or \u0003");
- }
-
- private void handleEmbedded(Node action, XHTMLContentHandler handler, ParseContext context)
- throws TikaException, SAXException, IOException {
- String fileName = "";
- String contentType = "";
- NamedNodeMap attrs = action.getAttributes();
- if (attrs != null) {
- Node n = attrs.getNamedItem("filename");
- if (n != null) {
- fileName = n.getNodeValue();
- }
- n = attrs.getNamedItem("content-type");
- if (n != null) {
- contentType = n.getNodeValue();
- }
- }
-
- String embeddedText = action.getTextContent();
- EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor(context);
- Metadata m = new Metadata();
- m.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
- if (! "".equals(contentType)) {
- m.set(Metadata.CONTENT_TYPE, contentType);
- }
- InputStream is = new ByteArrayInputStream(embeddedText.getBytes(UTF_8));
-
- extractor.parseEmbedded(
- is,
- new EmbeddedContentHandler(handler),
- m, true);
-
-
- }
-
- protected EmbeddedDocumentExtractor getEmbeddedDocumentExtractor(ParseContext context) {
- EmbeddedDocumentExtractor extractor =
- context.get(EmbeddedDocumentExtractor.class);
- if (extractor == null) {
- Parser p = context.get(Parser.class);
- if (p == null) {
- context.set(Parser.class, new MockParser());
- }
- extractor = new ParsingEmbeddedDocumentExtractor(context);
- }
- return extractor;
- }
-
- private void print(Node action, String name) {
- String content = action.getTextContent();
- if ("print_out".equals(name)) {
- System.out.println(content);
- } else if ("print_err".equals(name)) {
- System.err.println(content);
- } else {
- throw new IllegalArgumentException("must be print_out or print_err");
- }
- }
- private void hang(Node action) {
- boolean interruptible = true;
- boolean heavy = false;
- long millis = -1;
- long pulseMillis = -1;
- NamedNodeMap attrs = action.getAttributes();
- Node iNode = attrs.getNamedItem("interruptible");
- if (iNode != null) {
- interruptible = ("true".equals(iNode.getNodeValue()));
- }
- Node hNode = attrs.getNamedItem("heavy");
- if (hNode != null) {
- heavy = ("true".equals(hNode.getNodeValue()));
- }
-
- Node mNode = attrs.getNamedItem("millis");
- if (mNode == null) {
- throw new RuntimeException("Must specify \"millis\" attribute for hang.");
- }
- String millisString = mNode.getNodeValue();
- try {
- millis = Long.parseLong(millisString);
- } catch (NumberFormatException e) {
- throw new RuntimeException("Value for \"millis\" attribute must be a long.");
- }
-
- if (heavy) {
- Node pNode = attrs.getNamedItem("pulse_millis");
- if (pNode == null) {
- throw new RuntimeException("Must specify attribute \"pulse_millis\" if the hang is \"heavy\"");
- }
- String pulseMillisString = mNode.getNodeValue();
- try {
- pulseMillis = Long.parseLong(pulseMillisString);
- } catch (NumberFormatException e) {
- throw new RuntimeException("Value for \"millis\" attribute must be a long.");
- }
- }
- if (heavy) {
- hangHeavy(millis, pulseMillis, interruptible);
- } else {
- sleep(millis, interruptible);
- }
- }
-
- private void throwIt(Node action) throws IOException,
- SAXException, TikaException {
- NamedNodeMap attrs = action.getAttributes();
- String className = attrs.getNamedItem("class").getNodeValue();
- String msg = action.getTextContent();
- throwIt(className, msg);
- }
-
- private void metadata(Node action, Metadata metadata) {
- NamedNodeMap attrs = action.getAttributes();
- //throws npe unless there is a name
- String name = attrs.getNamedItem("name").getNodeValue();
- String value = action.getTextContent();
- Node actionType = attrs.getNamedItem("action");
- if (actionType == null) {
- metadata.add(name, value);
- } else {
- if ("set".equals(actionType.getNodeValue())) {
- metadata.set(name, value);
- } else {
- metadata.add(name, value);
- }
- }
- }
-
- private void write(Node action, XHTMLContentHandler xhtml) throws SAXException {
- NamedNodeMap attrs = action.getAttributes();
- Node eNode = attrs.getNamedItem("element");
- String elementType = "p";
- if (eNode != null) {
- elementType = eNode.getTextContent();
- }
- String text = action.getTextContent();
- xhtml.startElement(elementType);
- xhtml.characters(text);
- xhtml.endElement(elementType);
- }
-
-
- private void throwIt(String className, String msg) throws IOException,
- SAXException, TikaException {
- Throwable t = null;
- if (msg == null || msg.equals("")) {
- try {
- t = (Throwable) Class.forName(className).newInstance();
- } catch (Exception e) {
- throw new RuntimeException("couldn't create throwable class:"+className, e);
- }
- } else {
- try {
- Class<?> clazz = Class.forName(className);
- Constructor<?> con = clazz.getConstructor(String.class);
- t = (Throwable) con.newInstance(msg);
- } catch (Exception e) {
- throw new RuntimeException("couldn't create throwable class:" + className, e);
- }
- }
- if (t instanceof SAXException) {
- throw (SAXException)t;
- } else if (t instanceof IOException) {
- throw (IOException) t;
- } else if (t instanceof TikaException) {
- throw (TikaException) t;
- } else if (t instanceof Error) {
- throw (Error) t;
- } else if (t instanceof RuntimeException) {
- throw (RuntimeException) t;
- } else {
- //wrap the throwable in a RuntimeException
- throw new RuntimeException(t);
- }
- }
-
- private void kabOOM() {
- List<int[]> ints = new ArrayList<int[]>();
-
- while (true) {
- int[] intArr = new int[32000];
- ints.add(intArr);
- }
- }
-
- private void hangHeavy(long maxMillis, long pulseCheckMillis, boolean interruptible) {
- //do some heavy computation and occasionally check for
- //whether time has exceeded maxMillis (see TIKA-1132 for inspiration)
- //or whether the thread was interrupted
- long start = new Date().getTime();
- int lastChecked = 0;
- while (true) {
- for (int i = 1; i < Integer.MAX_VALUE; i++) {
- for (int j = 1; j < Integer.MAX_VALUE; j++) {
- double div = (double) i / (double) j;
- lastChecked++;
- if (lastChecked > pulseCheckMillis) {
- lastChecked = 0;
- if (interruptible && Thread.currentThread().isInterrupted()) {
- return;
- }
- long elapsed = new Date().getTime()-start;
- if (elapsed > maxMillis) {
- return;
- }
- }
- }
- }
- }
- }
-
- private void sleep(long maxMillis, boolean isInterruptible) {
- long start = new Date().getTime();
- long millisRemaining = maxMillis;
- while (true) {
- try {
- Thread.sleep(millisRemaining);
- } catch (InterruptedException e) {
- if (isInterruptible) {
- return;
- }
- }
- long elapsed = new Date().getTime()-start;
- millisRemaining = maxMillis - elapsed;
- if (millisRemaining <= 0) {
- break;
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parent/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 0f92da6..b5ce015 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -275,7 +275,6 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 932a058..724f0f9 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -61,8 +61,9 @@
<dependencies>
<!-- Test dependencies -->
<dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-test-resources</artifactId>
+ <version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-advanced-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/pom.xml b/tika-parser-modules/tika-parser-advanced-module/pom.xml
index d71e4af..2e02904 100644
--- a/tika-parser-modules/tika-parser-advanced-module/pom.xml
+++ b/tika-parser-modules/tika-parser-advanced-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-cad-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/pom.xml b/tika-parser-modules/tika-parser-cad-module/pom.xml
index 45d44c3..c606b06 100644
--- a/tika-parser-modules/tika-parser-cad-module/pom.xml
+++ b/tika-parser-modules/tika-parser-cad-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-code-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/pom.xml b/tika-parser-modules/tika-parser-code-module/pom.xml
index 46c6039..c74455a 100644
--- a/tika-parser-modules/tika-parser-code-module/pom.xml
+++ b/tika-parser-modules/tika-parser-code-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-crypto-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-crypto-module/pom.xml b/tika-parser-modules/tika-parser-crypto-module/pom.xml
index d516b60..3d41017 100644
--- a/tika-parser-modules/tika-parser-crypto-module/pom.xml
+++ b/tika-parser-modules/tika-parser-crypto-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,15 +29,7 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
<dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
<version>1.52</version>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-database-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-database-module/pom.xml b/tika-parser-modules/tika-parser-database-module/pom.xml
index e0ce086..69f1df0 100644
--- a/tika-parser-modules/tika-parser-database-module/pom.xml
+++ b/tika-parser-modules/tika-parser-database-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<!-- Provided dependencies -->
<dependency>
<groupId>org.xerial</groupId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-ebook-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-ebook-module/pom.xml b/tika-parser-modules/tika-parser-ebook-module/pom.xml
index 6938e81..89bab53 100644
--- a/tika-parser-modules/tika-parser-ebook-module/pom.xml
+++ b/tika-parser-modules/tika-parser-ebook-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-parser-text-module</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-journal-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-journal-module/pom.xml b/tika-parser-modules/tika-parser-journal-module/pom.xml
index 1d78e63..1a29605 100644
--- a/tika-parser-modules/tika-parser-journal-module/pom.xml
+++ b/tika-parser-modules/tika-parser-journal-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -33,14 +33,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-rs-client</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-multimedia-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/pom.xml b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
index 7987296..f15f3bd 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/pom.xml
+++ b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -35,14 +35,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
index 225c7d4..36c0efe 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
@@ -16,15 +16,14 @@
*/
package org.apache.tika.parser.ocr;
-import org.apache.tika.TikaTest;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.InputStream;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import org.apache.tika.TikaTest;
+import org.junit.Test;
public class TesseractOCRConfigTest extends TikaTest {
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-office-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/pom.xml b/tika-parser-modules/tika-parser-office-module/pom.xml
index c968b08..3a8e5d2 100644
--- a/tika-parser-modules/tika-parser-office-module/pom.xml
+++ b/tika-parser-modules/tika-parser-office-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -29,14 +29,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-package-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-package-module/pom.xml b/tika-parser-modules/tika-parser-package-module/pom.xml
index 6aadbdc..7a3faa9 100644
--- a/tika-parser-modules/tika-parser-package-module/pom.xml
+++ b/tika-parser-modules/tika-parser-package-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -34,14 +34,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-pdf-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-pdf-module/pom.xml b/tika-parser-modules/tika-parser-pdf-module/pom.xml
index 0344c04..dfe2f0a 100644
--- a/tika-parser-modules/tika-parser-pdf-module/pom.xml
+++ b/tika-parser-modules/tika-parser-pdf-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -33,14 +33,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-scientific-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/pom.xml b/tika-parser-modules/tika-parser-scientific-module/pom.xml
index 46ab130..9616b77 100644
--- a/tika-parser-modules/tika-parser-scientific-module/pom.xml
+++ b/tika-parser-modules/tika-parser-scientific-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -33,14 +33,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
index 9aa1268..ef31abc 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java
@@ -16,6 +16,10 @@
*/
package org.apache.tika.parser.dif;
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
@@ -24,10 +28,6 @@ import org.apache.tika.sax.BodyContentHandler;
import org.junit.Test;
import org.xml.sax.ContentHandler;
-import java.io.InputStream;
-
-import static org.junit.Assert.assertEquals;
-
public class DIFParserTest extends TikaTest {
@Test
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
index 92790e0..cf37989 100644
--- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
+++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
@@ -19,27 +19,23 @@ package org.apache.tika.parser.gdal;
//JDK imports
-import java.io.IOException;
-import java.io.InputStream;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeTrue;
+import java.io.InputStream;
-//Tika imports
import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.sax.BodyContentHandler;
-
-//Junit imports
import org.junit.Test;
-import org.xml.sax.SAXException;
-import static org.junit.Assert.fail;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assume.assumeTrue;
+//Tika imports
+//Junit imports
/**
* Test harness for the GDAL parser.
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-text-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-text-module/pom.xml b/tika-parser-modules/tika-parser-text-module/pom.xml
index aaea881..1389d08 100644
--- a/tika-parser-modules/tika-parser-text-module/pom.xml
+++ b/tika-parser-modules/tika-parser-text-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -33,14 +33,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>com.googlecode.juniversalchardet</groupId>
<artifactId>juniversalchardet</artifactId>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parser-modules/tika-parser-web-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/pom.xml b/tika-parser-modules/tika-parser-web-module/pom.xml
index e5bae33..53aadb2 100644
--- a/tika-parser-modules/tika-parser-web-module/pom.xml
+++ b/tika-parser-modules/tika-parser-web-module/pom.xml
@@ -10,7 +10,7 @@
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -33,14 +33,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.ccil.cowan.tagsoup</groupId>
<artifactId>tagsoup</artifactId>
@@ -94,4 +86,4 @@
</plugins>
</build>
-</project>
+</project>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index d4602ac..76a78ac 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -53,15 +53,6 @@
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
-
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-parser-multimedia-module</artifactId>
@@ -167,7 +158,6 @@
<artifactId>tika-test-resources</artifactId>
<version>${project.version}</version>
<scope>test</scope>
- <type>test-jar</type>
</dependency>
</dependencies>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
deleted file mode 100644
index 3d58b40..0000000
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
+++ /dev/null
@@ -1,247 +0,0 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.Date;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-
-public class MockParserTest extends TikaTest {
- private final static String M = "/test-documents/mock/";
- private final static Parser PARSER = new AutoDetectParser();
-
- @Override
- public XMLResult getXML(String path, Metadata m) throws Exception {
- //note that this is specific to MockParserTest with addition of M to the path!
- InputStream is = getResourceAsStream(M+path);
- try {
- return super.getXML(is, PARSER, m);
- } finally {
- IOUtils.closeQuietly(is);
- }
- }
-
- @Test
- public void testExample() throws Exception {
- Metadata m = new Metadata();
- PrintStream out = System.out;
- PrintStream err = System.err;
- ByteArrayOutputStream outBos = new ByteArrayOutputStream();
- ByteArrayOutputStream errBos = new ByteArrayOutputStream();
- PrintStream tmpOut = new PrintStream(outBos, true, UTF_8.toString());
- PrintStream tmpErr = new PrintStream(errBos, true, UTF_8.toString());
- System.setOut(tmpOut);
- System.setErr(tmpErr);
- try {
- assertThrowable("example.xml", m, IOException.class, "not another IOException");
- assertMockParser(m);
- } finally {
- System.setOut(out);
- System.setErr(err);
- }
- String outString = new String(outBos.toByteArray(), UTF_8);
- assertContains("writing to System.out", outString);
-
- String errString = new String(errBos.toByteArray(), UTF_8);
- assertContains("writing to System.err", errString);
-
- }
-
- @Test
- public void testNothingBad() throws Exception {
- Metadata m = new Metadata();
- String content = getXML("nothing_bad.xml", m).xml;
- assertEquals("Geoffrey Chaucer", m.get("author"));
- assertContains("<p>And bathed every veyne in swich licour,</p>", content);
- assertMockParser(m);
- }
-
- @Test
- public void testNullPointer() throws Exception {
- Metadata m = new Metadata();
- assertThrowable("null_pointer.xml", m, NullPointerException.class, "another null pointer exception");
- assertMockParser(m);
- }
-
- @Test
- public void testNullPointerNoMsg() throws Exception {
- Metadata m = new Metadata();
- assertThrowable("null_pointer_no_msg.xml", m, NullPointerException.class, null);
- assertMockParser(m);
- }
-
-
- @Test
- public void testSleep() throws Exception {
- long start = new Date().getTime();
- Metadata m = new Metadata();
- String content = getXML("sleep.xml", m).xml;
- assertMockParser(m);
- long elapsed = new Date().getTime()-start;
- //should sleep for at least 3000
- boolean enoughTimeHasElapsed = elapsed > 2000;
- assertTrue("not enough time has not elapsed: "+elapsed, enoughTimeHasElapsed);
- assertMockParser(m);
- }
-
- @Test
- public void testHeavyHang() throws Exception {
- long start = new Date().getTime();
- Metadata m = new Metadata();
-
- String content = getXML("heavy_hang.xml", m).xml;
- assertMockParser(m);
- long elapsed = new Date().getTime()-start;
- //should sleep for at least 3000
- boolean enoughTimeHasElapsed = elapsed > 2000;
- assertTrue("not enough time has elapsed: "+elapsed, enoughTimeHasElapsed);
- assertMockParser(m);
- }
-
- @Test
- public void testFakeOOM() throws Exception {
- Metadata m = new Metadata();
- assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not another oom");
- assertMockParser(m);
- }
-
- @Test
- public void testRealOOM() throws Exception {
- //Note: we're not actually testing the diff between fake and real oom
- //i.e. by creating child process and setting different -Xmx or
- //memory profiling.
- Metadata m = new Metadata();
- assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap space");
- assertMockParser(m);
- }
-
- @Test
- public void testInterruptibleSleep() {
- //Without static initialization of the parser, it can take ~1 second after t.start()
- //before the parser actually calls parse. This is
- //just the time it takes to instantiate and call AutoDetectParser, do the detection, etc.
- //This is not thread creation overhead.
- ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
- Thread t = new Thread(r);
- t.start();
- long start = new Date().getTime();
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) {
- //swallow
- }
-
- t.interrupt();
-
- try {
- t.join(10000);
- } catch (InterruptedException e) {
- //swallow
- }
- long elapsed = new Date().getTime()-start;
- boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
- assertTrue("elapsed (" + elapsed + " millis) was not short enough", shortEnough);
- }
-
- @Test
- public void testNonInterruptibleSleep() {
- ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
- Thread t = new Thread(r);
- t.start();
- long start = new Date().getTime();
- try {
- //make sure that the thread has actually started
- Thread.sleep(1000);
- } catch (InterruptedException e) {
- //swallow
- }
- t.interrupt();
- try {
- t.join(20000);
- } catch (InterruptedException e) {
- //swallow
- }
- long elapsed = new Date().getTime()-start;
- boolean longEnough = elapsed > 3000;//the xml file specifies 3000, this sleeps 1000
- assertTrue("elapsed ("+elapsed+" millis) was not long enough", longEnough);
- }
-
- private class ParserRunnable implements Runnable {
- private final String path;
- ParserRunnable(String path) {
- this.path = path;
- }
- @Override
- public void run() {
- Metadata m = new Metadata();
- try {
- getXML(path, m);
- } catch (Exception e) {
- throw new RuntimeException(e);
- } finally {
- assertMockParser(m);
- }
- }
- }
-
- private void assertThrowable(String path, Metadata m, Class<? extends Throwable> expected, String message) {
-
- try {
- getXML(path, m);
- } catch (Throwable t) {
- //if this is a throwable wrapped in a TikaException, use the cause
- if (t instanceof TikaException && t.getCause() != null) {
- t = t.getCause();
- }
- if (! (t.getClass().isAssignableFrom(expected))){
- fail(t.getClass() +" is not assignable from "+expected);
- }
- if (message != null) {
- assertEquals(message, t.getMessage());
- }
- }
- }
-
- private void assertMockParser(Metadata m) {
- String[] parsers = m.getValues("X-Parsed-By");
- //make sure that it was actually parsed by mock.
- boolean parsedByMock = false;
- for (String parser : parsers) {
- if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
- parsedByMock = true;
- break;
- }
- }
- assertTrue("mock parser should have been called", parsedByMock);
- }
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-server/pom.xml
----------------------------------------------------------------------
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index ee0dbc8..6f267f6 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -114,13 +114,6 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
<artifactId>tika-parsers</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-test-resources/pom.xml
----------------------------------------------------------------------
diff --git a/tika-test-resources/pom.xml b/tika-test-resources/pom.xml
index a7bbffc..e7bf3fe 100644
--- a/tika-test-resources/pom.xml
+++ b/tika-test-resources/pom.xml
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.tika</groupId>
@@ -16,15 +16,14 @@
<dependencies>
<dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>${commons.io.version}</version>
- </dependency>
- <dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
</dependencies>
<profiles>
<profile>
@@ -72,7 +71,7 @@
<executions>
<execution>
<goals>
- <goal>test-jar</goal>
+ <goal>jar</goal>
</goals>
</execution>
</executions>
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-test-resources/src/main/java/org/apache/tika/TikaTest.java
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/main/java/org/apache/tika/TikaTest.java b/tika-test-resources/src/main/java/org/apache/tika/TikaTest.java
new file mode 100644
index 0000000..2c6f21f
--- /dev/null
+++ b/tika-test-resources/src/main/java/org/apache/tika/TikaTest.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.extractor.EmbeddedResourceHandler;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Parent class of Tika tests
+ */
+public abstract class TikaTest {
+ /**
+ * This method will give you back the filename incl. the absolute path name
+ * to the resource. If the resource does not exist it will give you back the
+ * resource name incl. the path.
+ *
+ * @param name
+ * The named resource to search for.
+ * @return an absolute path incl. the name which is in the same directory as
+ * the the class you've called it from.
+ */
+ public File getResourceAsFile(String name) throws URISyntaxException {
+ URL url = this.getClass().getResource(name);
+ if (url != null) {
+ return new File(url.toURI());
+ } else {
+ // We have a file which does not exists
+ // We got the path
+ url = this.getClass().getResource(".");
+ File file = new File(new File(url.toURI()), name);
+ if (file == null) {
+ fail("Unable to find requested file " + name);
+ }
+ return file;
+ }
+ }
+
+ public InputStream getResourceAsStream(String name) {
+ InputStream stream = this.getClass().getResourceAsStream(name);
+ if (stream == null) {
+ fail("Unable to find requested resource " + name);
+ }
+ return stream;
+ }
+
+ public static void assertContains(String needle, String haystack) {
+ assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
+ }
+ public static <T> void assertContains(T needle, Collection<? extends T> haystack) {
+ assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
+ }
+
+ public static void assertNotContained(String needle, String haystack) {
+ assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
+ }
+ public static <T> void assertNotContained(T needle, Collection<? extends T> haystack) {
+ assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
+ }
+
+ protected static class XMLResult {
+ public final String xml;
+ public final Metadata metadata;
+
+ public XMLResult(String xml, Metadata metadata) {
+ this.xml = xml;
+ this.metadata = metadata;
+ }
+ }
+
+ protected XMLResult getXML(String filePath, Parser parser, Metadata metadata) throws Exception {
+ return getXML(getResourceAsStream("/test-documents/" + filePath), parser, metadata);
+ }
+
+ protected XMLResult getXML(String filePath, Metadata metadata) throws Exception {
+ return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), metadata);
+ }
+
+ protected XMLResult getXML(String filePath) throws Exception {
+ return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), new Metadata());
+ }
+
+ protected XMLResult getXML(InputStream input, Parser parser, Metadata metadata) throws Exception {
+ ParseContext context = new ParseContext();
+ context.set(Parser.class, parser);
+
+ try {
+ ContentHandler handler = new ToXMLContentHandler();
+ parser.parse(input, handler, metadata, context);
+ return new XMLResult(handler.toString(), metadata);
+ } finally {
+ input.close();
+ }
+ }
+
+ /**
+ * Basic text extraction.
+ * <p>
+ * Tries to close input stream after processing.
+ */
+ public String getText(InputStream is, Parser parser, ParseContext context, Metadata metadata) throws Exception{
+ ContentHandler handler = new BodyContentHandler(1000000);
+ try {
+ parser.parse(is, handler, metadata, context);
+ } finally {
+ is.close();
+ }
+ return handler.toString();
+ }
+
+ public String getText(InputStream is, Parser parser, Metadata metadata) throws Exception{
+ return getText(is, parser, new ParseContext(), metadata);
+ }
+
+ public String getText(InputStream is, Parser parser, ParseContext context) throws Exception{
+ return getText(is, parser, context, new Metadata());
+ }
+
+ public String getText(InputStream is, Parser parser) throws Exception{
+ return getText(is, parser, new ParseContext(), new Metadata());
+ }
+
+ /**
+ * Keeps track of media types and file names recursively.
+ *
+ */
+ public static class TrackingHandler implements EmbeddedResourceHandler {
+ public List<String> filenames = new ArrayList<String>();
+ public List<MediaType> mediaTypes = new ArrayList<MediaType>();
+
+ private final Set<MediaType> skipTypes;
+
+ public TrackingHandler() {
+ skipTypes = new HashSet<MediaType>();
+ }
+
+ public TrackingHandler(Set<MediaType> skipTypes) {
+ this.skipTypes = skipTypes;
+ }
+
+ @Override
+ public void handle(String filename, MediaType mediaType,
+ InputStream stream) {
+ if (skipTypes.contains(mediaType)) {
+ return;
+ }
+ mediaTypes.add(mediaType);
+ filenames.add(filename);
+ }
+ }
+
+ /**
+ * Copies byte[] of embedded documents into a List.
+ */
+ public static class ByteCopyingHandler implements EmbeddedResourceHandler {
+
+ public List<byte[]> bytes = new ArrayList<byte[]>();
+
+ @Override
+ public void handle(String filename, MediaType mediaType,
+ InputStream stream) {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ if (! stream.markSupported()) {
+ stream = TikaInputStream.get(stream);
+ }
+ stream.mark(0);
+ try {
+ IOUtils.copy(stream, os);
+ bytes.add(os.toByteArray());
+ stream.reset();
+ } catch (IOException e) {
+ //swallow
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/afb6cf26/tika-test-resources/src/main/java/org/apache/tika/config/AbstractTikaConfigTest.java
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/main/java/org/apache/tika/config/AbstractTikaConfigTest.java b/tika-test-resources/src/main/java/org/apache/tika/config/AbstractTikaConfigTest.java
new file mode 100644
index 0000000..1b104f7
--- /dev/null
+++ b/tika-test-resources/src/main/java/org/apache/tika/config/AbstractTikaConfigTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertNotNull;
+
+import java.net.URL;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.parser.ParseContext;
+import org.junit.After;
+
+/**
+ * Parent of Junit test classes for {@link TikaConfig}, including
+ * Tika Core based ones, and ones in Tika Parsers that do things
+ * that tika-core's can't, do due to a need for the
+ * full set of "real" classes of parsers / detectors
+ */
+public abstract class AbstractTikaConfigTest extends TikaTest {
+ protected static ParseContext context = new ParseContext();
+
+ protected static String getConfigPath(String config) throws Exception {
+ URL url = TikaConfig.class.getResource(config);
+ assertNotNull("Test Tika Config not found: " + config, url);
+ return url.toExternalForm();
+ }
+ protected static TikaConfig getConfig(String config) throws Exception {
+ System.setProperty("tika.config", getConfigPath(config));
+ return new TikaConfig();
+ }
+
+ @After
+ public void resetConfig() {
+ System.clearProperty("tika.config");
+ }
+}