You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/12/16 16:46:15 UTC
[tika] branch main updated: TIKA-3180 modularize tika-server (#394)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 435f0c9 TIKA-3180 modularize tika-server (#394)
435f0c9 is described below
commit 435f0c982a1fc5223236199b01cbb7207642e4dd
Author: Tim Allison <ta...@apache.org>
AuthorDate: Wed Dec 16 11:46:07 2020 -0500
TIKA-3180 modularize tika-server (#394)
* TIKA-3180 WIP -- DO NOT COMMIT -- initial pass at modularizing tika-server. Still need to add back in XMPWriter and Unpacker's ability to handle embedded file types that require POI as a dependency.
* TIKA-3180 -- fix licenses -- pass the RAT!
* TIKA-3180 -- modularize tika-server
---
CHANGES.txt | 3 +
tika-app/pom.xml | 2 +-
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 34 +-
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 2 +-
tika-bundles/pom.xml | 19 ++
tika-bundles/tika-bundle-classic/pom.xml | 34 +-
.../extractor/DefaultEmbeddedStreamTranslator.java | 87 +++++
.../tika/extractor/EmbeddedStreamTranslator.java | 24 +-
tika-eval/tika-eval-app/pom.xml | 18 +
tika-eval/tika-eval-core/pom.xml | 18 +
.../tika-parsers-classic-modules/pom.xml | 1 +
.../pom.xml | 24 +-
.../sax/boilerpipe}/BoilerpipeContentHandler.java | 2 +-
.../tika-parser-html-module/pom.xml | 11 +-
.../apache/tika/parser/html/HtmlParserTest.java | 3 +-
.../microsoft/MSEmbeddedStreamTranslator.java | 101 ++++++
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 4 +
....apache.tika.extractor.EmbeddedStreamTranslator | 15 +
.../tika-parsers-classic-package/pom.xml | 7 +-
tika-server/README.md | 18 +
tika-server/pom.xml | 362 +--------------------
tika-server/src/test/resources/mock/fake_oom.xml | 24 --
.../src/test/resources/mock/heavy_hang_100.xml | 25 --
.../src/test/resources/mock/heavy_hang_30000.xml | 25 --
.../src/test/resources/mock/null_pointer.xml | 25 --
tika-server/src/test/resources/mock/real_oom.xml | 24 --
.../src/test/resources/mock/system_exit.xml | 25 --
.../test/resources/mock/testStaticStdOutErr.xml | 75 -----
.../src/test/resources/mock/testStdOutErr.xml | 75 -----
.../src/test/resources/mock/thread_interrupt.xml | 25 --
tika-server/{ => tika-server-classic}/assembly.xml | 2 +-
.../{ => tika-server-classic}/bin/init.d/tika | 0
.../bin/install_tika_service.sh | 0
tika-server/{ => tika-server-classic}/bin/tika | 0
.../{ => tika-server-classic}/bin/tika.in.sh | 0
tika-server/tika-server-classic/pom.xml | 245 ++++++++++++++
.../server/classic/config/PDFServerConfig.java | 52 +++
.../classic/config/TesseractServerConfig.java | 52 +++
.../classic/resource/XMPMetadataResource.java | 49 +++
.../classic}/writer/XMPMessageBodyWriter.java | 6 +-
.../org.apache.tika.server.core.ParseContextConfig | 16 +
...he.tika.server.core.resource.TikaServerResource | 15 +
...apache.tika.server.core.writer.TikaServerWriter | 15 +
.../tika/server/classic}/DetectorResourceTest.java | 28 +-
.../tika/server/classic}/MetadataResourceTest.java | 21 +-
.../classic}/RecursiveMetadataFilterTest.java | 19 +-
.../classic}/RecursiveMetadataResourceTest.java | 12 +-
.../tika/server/classic}/TikaDetectorsTest.java | 5 +-
.../tika/server/classic}/TikaMimeTypesTest.java | 65 +---
.../tika/server/classic}/TikaParsersTest.java | 6 +-
.../tika/server/classic}/TikaResourceTest.java | 126 +++----
.../tika/server/classic}/UnpackerResourceTest.java | 58 ++--
.../test/resources/config}/TIKA-3137-include.xml | 0
.../src/test/resources/log4j.properties | 2 +-
.../src/test/resources/test-documents}/2exe.docx | Bin
.../src/test/resources/test-documents}/2pic.doc | Bin
.../src/test/resources/test-documents}/2pic.docx | Bin
.../test-documents}/CDEC_WEATHER_2010_03_02 | 0
.../test/resources/test-documents}/Doc1_ole.doc | Bin
.../src/test/resources/test-documents}/foo.csv | 0
.../test/resources/test-documents}/password.xls | Bin
.../src/test/resources/test-documents}/pic.xls | Bin
.../src/test/resources/test-documents}/pic.xlsx | Bin
.../src/test/resources/test-documents}/test.doc | Bin
.../test/resources/test-documents}/testHTML.html | 0
.../src/test/resources/test-documents}/testOCR.pdf | Bin
.../test-documents}/testPDFTwoTextBoxes.pdf | Bin
.../test-documents}/testPassword4Spaces.pdf | Bin
.../testRTF_npeFromWMFInTikaServer.rtf | 0
.../test-documents}/testUnicodePassword.pdf | Bin
.../test-documents}/test_recursive_embedded.docx | Bin
.../pom.xml} | 28 +-
tika-server/{ => tika-server-core}/pom.xml | 71 ++--
.../server/core/CompositeParseContextConfig.java} | 28 +-
.../server/core}/DefaultInputStreamFactory.java | 2 +-
.../org/apache/tika/server/core}/HTMLHelper.java | 2 +-
.../tika/server/core}/InputStreamFactory.java | 2 +-
.../org/apache/tika/server/core}/MetadataList.java | 2 +-
.../tika/server/core/ParseContextConfig.java | 50 +++
.../org/apache/tika/server/core}/ServerStatus.java | 2 +-
.../tika/server/core}/ServerStatusWatcher.java | 2 +-
.../apache/tika/server/core}/ServerTimeouts.java | 2 +-
.../org/apache/tika/server/core}/TaskStatus.java | 4 +-
.../tika/server/core}/TikaLoggingFilter.java | 2 +-
.../apache/tika/server/core}/TikaServerCli.java | 65 ++--
.../server/core}/TikaServerParseException.java | 2 +-
.../core}/TikaServerParseExceptionMapper.java | 7 +-
.../tika/server/core}/TikaServerWatchDog.java | 4 +-
.../server/core}/URLEnabledInputStreamFactory.java | 2 +-
.../server/core/config/DocumentSelectorConfig.java | 48 +++
.../server/core/config/PasswordProviderConfig.java | 61 ++++
.../server/core}/resource/DetectorResource.java | 4 +-
.../server/core}/resource/LanguageResource.java | 2 +-
.../server/core}/resource/MetadataResource.java | 24 +-
.../core}/resource/RecursiveMetadataResource.java | 13 +-
.../tika/server/core}/resource/TikaDetectors.java | 4 +-
.../tika/server/core}/resource/TikaMimeTypes.java | 4 +-
.../tika/server/core}/resource/TikaParsers.java | 4 +-
.../tika/server/core}/resource/TikaResource.java | 108 ++----
.../server/core/resource/TikaServerResource.java | 4 +
.../server/core}/resource/TikaServerStatus.java | 4 +-
.../tika/server/core}/resource/TikaVersion.java | 2 +-
.../tika/server/core}/resource/TikaWelcome.java | 6 +-
.../server/core}/resource/TranslateResource.java | 6 +-
.../server/core}/resource/UnpackerResource.java | 72 +---
.../server/core}/writer/CSVMessageBodyWriter.java | 2 +-
.../server/core}/writer/JSONMessageBodyWriter.java | 2 +-
.../tika/server/core}/writer/JSONObjWriter.java | 4 +-
.../writer/MetadataListMessageBodyWriter.java | 4 +-
.../apache/tika/server/core}/writer/TarWriter.java | 2 +-
.../server/core}/writer/TextMessageBodyWriter.java | 2 +-
.../tika/server/core/writer/TikaServerWriter.java | 10 +
.../apache/tika/server/core}/writer/ZipWriter.java | 2 +-
.../org.apache.tika.server.core.ParseContextConfig | 16 +
.../src/main/resources/log4j.properties | 0
.../src/main/resources/tikaserver-template.html | 0
.../main/resources/tikaserver-version.properties | 0
.../org/apache/tika/server/core}/CXFTestBase.java | 15 +-
.../tika/server/core}/LanguageResourceTest.java | 13 +-
.../tika/server/core}/NullWebClientLogger.java | 3 +-
.../apache/tika/server/core}/ServerStatusTest.java | 4 +-
.../tika/server/core}/StackTraceOffTest.java | 39 ++-
.../apache/tika/server/core}/StackTraceTest.java | 41 +--
.../tika/server/core}/TikaMimeTypesTest.java | 38 +--
.../apache/tika/server/core/TikaResourceTest.java | 111 +++++++
.../server/core}/TikaServerIntegrationTest.java | 35 +-
.../tika/server/core}/TikaServerStatusTest.java | 12 +-
.../apache/tika/server/core}/TikaVersionTest.java | 5 +-
.../apache/tika/server/core}/TikaWelcomeTest.java | 13 +-
.../tika/server/core}/TranslateResourceTest.java | 11 +-
.../config}/tika-config-for-server-tests.xml | 0
.../src/test/resources/log4j.properties | 2 +-
.../src/test/resources/logging/log4j_child.xml | 4 +-
.../src/test/resources/test-documents}/english.txt | 0
.../src/test/resources/test-documents}/french.txt | 0
.../resources/test-documents}/testDigilite.fdf | 0
136 files changed, 1547 insertions(+), 1392 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 49a7ccc..0d61915 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -11,6 +11,9 @@ Release 2.0.0 - ???
* General code cleanup (PeterAlfredLee)
+ * tika-server's /metadata endpoint requires tika-server-classic to write XMP/rdf output.
+ This output is not available in tika-server-core.
+
Other changes
Release 1.26 - ???
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 461c45d..14525ef 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -112,7 +112,7 @@
</createDependencyReducedPom>
<artifactSet>
<excludes>
- <exclude>org.apache.tika:tika-parsers:jar:</exclude>
+ <exclude>org.apache.tika:tika-parsers-classic-package:jar:</exclude>
</excludes>
</artifactSet>
<filters>
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 7666dc9..b246207 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -74,7 +74,9 @@ import org.apache.tika.config.TikaConfigSerializer;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.DefaultEmbeddedStreamTranslator;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedStreamTranslator;
import org.apache.tika.fork.ForkParser;
import org.apache.tika.gui.TikaGUI;
import org.apache.tika.io.TikaInputStream;
@@ -98,7 +100,6 @@ import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.digestutils.CommonsDigester;
-import org.apache.tika.parser.html.BoilerpipeContentHandler;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.BodyContentHandler;
@@ -106,6 +107,7 @@ import org.apache.tika.sax.ContentHandlerFactory;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.sax.WriteOutContentHandler;
+import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
import org.apache.tika.xmp.XMPMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1041,6 +1043,7 @@ public class TikaCLI {
private int count = 0;
private final TikaConfig config = TikaConfig.getDefaultConfig();
+ private final EmbeddedStreamTranslator embeddedStreamTranslator = new DefaultEmbeddedStreamTranslator();
public boolean shouldParseEmbedded(Metadata metadata) {
return true;
@@ -1070,15 +1073,9 @@ public class TikaCLI {
System.out.println("Extracting '"+name+"' ("+contentType+") to " + outputFile);
try (FileOutputStream os = new FileOutputStream(outputFile)) {
- if (inputStream instanceof TikaInputStream) {
- TikaInputStream tin = (TikaInputStream) inputStream;
-
- if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
- POIFSFileSystem fs = new POIFSFileSystem();
- copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
- fs.writeFilesystem(os);
- } else {
- IOUtils.copy(inputStream, os);
+ if (embeddedStreamTranslator.shouldTranslate(inputStream, metadata)) {
+ try (InputStream translated = embeddedStreamTranslator.translate(inputStream, metadata)) {
+ IOUtils.copy(translated, os);
}
} else {
IOUtils.copy(inputStream, os);
@@ -1148,23 +1145,6 @@ public class TikaCLI {
return ".bin";
}
-
- protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
- throws IOException {
- for (org.apache.poi.poifs.filesystem.Entry entry : sourceDir) {
- if (entry instanceof DirectoryEntry) {
- // Need to recurse
- DirectoryEntry newDir = destDir.createDirectory(entry.getName());
- copy((DirectoryEntry) entry, newDir);
- } else {
- // Copy entry
- try (InputStream contents =
- new DocumentInputStream((DocumentEntry) entry)) {
- destDir.createDocument(entry.getName(), contents);
- }
- }
- }
- }
}
private class NoDocumentMetHandler extends DefaultHandler {
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index a6dd019..50b3c22 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -77,7 +77,7 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.digestutils.CommonsDigester;
-import org.apache.tika.parser.html.BoilerpipeContentHandler;
+import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
diff --git a/tika-bundles/pom.xml b/tika-bundles/pom.xml
index 915f2c1..567e93f 100644
--- a/tika-bundles/pom.xml
+++ b/tika-bundles/pom.xml
@@ -1,4 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
@@ -18,4 +36,5 @@
<modules>
<module>tika-bundle-classic</module>
</modules>
+
</project>
\ No newline at end of file
diff --git a/tika-bundles/tika-bundle-classic/pom.xml b/tika-bundles/tika-bundle-classic/pom.xml
index 47f0273..b3cfb3a 100644
--- a/tika-bundles/tika-bundle-classic/pom.xml
+++ b/tika-bundles/tika-bundle-classic/pom.xml
@@ -1,21 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
- <!--
- Licenseadsfd to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
- -->
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
@@ -474,6 +475,7 @@
<configuration>
<excludes>
<exclude>src/main/resources/META-INF/MANIFEST.MF</exclude>
+ <exclude>dependency-reduced-pom.xml</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java b/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java
new file mode 100644
index 0000000..86af6c1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.extractor;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+/**
+ * Loads EmbeddedStreamTranslators via service loading. Tries to run each
+ * in turn and returns the first non-null value. If no translation has occurred,
+ * this returns the original InputStream. If a translation has occurred, the
+ * translator will consume the InputStream but not close it.
+ */
+public class DefaultEmbeddedStreamTranslator implements EmbeddedStreamTranslator {
+
+ final List<EmbeddedStreamTranslator> translators;
+
+ private static List<EmbeddedStreamTranslator> getDefaultFilters(ServiceLoader loader) {
+ List<EmbeddedStreamTranslator> embeddedStreamTranslators
+ = loader.loadServiceProviders(EmbeddedStreamTranslator.class);
+ ServiceLoaderUtils.sortLoadedClasses(embeddedStreamTranslators);
+ return embeddedStreamTranslators;
+ }
+
+ public DefaultEmbeddedStreamTranslator() {
+ this(getDefaultFilters(new ServiceLoader()));
+ }
+
+ private DefaultEmbeddedStreamTranslator(List<EmbeddedStreamTranslator> translators) {
+ this.translators = translators;
+ }
+
+ /**
+ * This should sniff the stream to determine if it needs to be translated.
+ * The translator is responsible for resetting the stream if any bytes have been read.
+ * @param inputStream
+ * @param metadata
+ * @return
+ * @throws IOException
+ */
+ @Override
+ public boolean shouldTranslate(InputStream inputStream, Metadata metadata) throws IOException {
+ for (EmbeddedStreamTranslator translator : translators) {
+ if (translator.shouldTranslate(inputStream, metadata)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * This will consume the InputStream and return a new stream of translated bytes.
+ * @param inputStream
+ * @param metadata
+ * @return
+ * @throws IOException
+ */
+ @Override
+ public InputStream translate(InputStream inputStream, Metadata metadata) throws IOException {
+ for (EmbeddedStreamTranslator translator : translators) {
+ InputStream translated = translator.translate(inputStream, metadata);
+ if (translated != null) {
+ return translated;
+ }
+ }
+ return inputStream;
+ }
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java
similarity index 65%
copy from tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
copy to tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java
index f1d6aa6..c6387fe 100644
--- a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java
@@ -14,27 +14,25 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-package org.apache.tika.server;
+package org.apache.tika.extractor;
import org.apache.tika.metadata.Metadata;
-import javax.ws.rs.core.HttpHeaders;
import java.io.IOException;
import java.io.InputStream;
/**
- * Passthrough -- returns InputStream as is
+ * Interface for different filtering of embedded streams.
+ * Specifically, unravel OLE streams in tika-server unpack,
+ * and/or handle open containers in TikaInputStream
+ *
+ * @since Apache Tika 2.0.0
*/
-public class DefaultInputStreamFactory implements InputStreamFactory {
+public interface EmbeddedStreamTranslator {
+
+ boolean shouldTranslate(InputStream inputStream, Metadata metadata) throws IOException;
- @Override
- public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders) throws IOException {
- return is;
- }
+ InputStream translate(InputStream inputStream,
+ Metadata metadata) throws IOException;
- @Override
- public InputStream getInputSteam(InputStream is, Metadata metadata, HttpHeaders httpHeaders) throws IOException {
- return is;
- }
}
diff --git a/tika-eval/tika-eval-app/pom.xml b/tika-eval/tika-eval-app/pom.xml
index 6490986..c8377f2 100644
--- a/tika-eval/tika-eval-app/pom.xml
+++ b/tika-eval/tika-eval-app/pom.xml
@@ -1,4 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
diff --git a/tika-eval/tika-eval-core/pom.xml b/tika-eval/tika-eval-core/pom.xml
index 77152e8..8154889 100644
--- a/tika-eval/tika-eval-core/pom.xml
+++ b/tika-eval/tika-eval-core/pom.xml
@@ -1,4 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/pom.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/pom.xml
index 52d67b1..ab76ef2 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/pom.xml
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/pom.xml
@@ -58,6 +58,7 @@
</dependency>
</dependencies>
<modules>
+ <module>tika-parser-html-commons</module>
<module>tika-parser-jdbc-commons</module>
<module>tika-parser-digest-commons</module>
<module>tika-parser-mail-commons</module>
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/pom.xml
similarity index 72%
copy from tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml
copy to tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/pom.xml
index fa647a3..98c99b4 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/pom.xml
@@ -24,37 +24,17 @@
<artifactId>tika-parsers-classic-modules</artifactId>
<groupId>org.apache.tika</groupId>
<version>2.0.0-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
- <artifactId>tika-parser-html-module</artifactId>
+ <artifactId>tika-parser-html-commons</artifactId>
<dependencies>
<dependency>
- <groupId>org.ccil.cowan.tagsoup</groupId>
- <artifactId>tagsoup</artifactId>
- <version>${tagsoup.version}</version>
- </dependency>
- <dependency>
<groupId>de.l3s.boilerpipe</groupId>
<artifactId>boilerpipe</artifactId>
<version>${boilerpipe.version}</version>
</dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- <version>${commons.codec.version}</version>
- </dependency>
-
- <!-- test scope -->
- <!-- this is required for basic encoding detection -->
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parser-text-module</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
</dependencies>
<build>
<plugins>
@@ -64,7 +44,7 @@
<configuration>
<archive>
<manifestEntries>
- <Automatic-Module-Name>org.apache.tika.parser.html</Automatic-Module-Name>
+ <Automatic-Module-Name>org.apache.tika.sax.boilerpipe</Automatic-Module-Name>
</manifestEntries>
</archive>
</configuration>
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/src/main/java/org/apache/tika/sax/boilerpipe/BoilerpipeContentHandler.java
similarity index 99%
rename from tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
rename to tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/src/main/java/org/apache/tika/sax/boilerpipe/BoilerpipeContentHandler.java
index 19a29b9..b8cca63 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/BoilerpipeContentHandler.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-commons/src/main/java/org/apache/tika/sax/boilerpipe/BoilerpipeContentHandler.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.html;
+package org.apache.tika.sax.boilerpipe;
import java.io.Writer;
import java.util.ArrayList;
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml
index fa647a3..807ef6e 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/pom.xml
@@ -32,15 +32,16 @@
<dependencies>
<dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-html-commons</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.ccil.cowan.tagsoup</groupId>
<artifactId>tagsoup</artifactId>
<version>${tagsoup.version}</version>
</dependency>
- <dependency>
- <groupId>de.l3s.boilerpipe</groupId>
- <artifactId>boilerpipe</artifactId>
- <version>${boilerpipe.version}</version>
- </dependency>
+
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
index 3b26443..05aea75 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
@@ -39,7 +39,6 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -64,6 +63,7 @@ import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Geographic;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
+
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
@@ -73,6 +73,7 @@ import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.LinkContentHandler;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Schema;
import org.junit.Ignore;
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
new file mode 100644
index 0000000..f8d157d
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
@@ -0,0 +1,101 @@
+package org.apache.tika.extractor.microsoft;
+
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.extractor.EmbeddedStreamTranslator;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class MSEmbeddedStreamTranslator implements EmbeddedStreamTranslator {
+
+ private static final Logger LOG = LoggerFactory.getLogger(MSEmbeddedStreamTranslator.class);
+
+ @Override
+ public boolean shouldTranslate(InputStream inputStream, Metadata metadata) throws IOException {
+ String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+ if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
+ return true;
+ } else if (inputStream instanceof TikaInputStream) {
+ TikaInputStream tin = (TikaInputStream) inputStream;
+ if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public InputStream translate(InputStream inputStream, Metadata metadata) throws IOException {
+ String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+ if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(inputStream, bos);
+ byte[] data = bos.toByteArray();
+ POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
+ OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+
+ if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
+ try {
+ Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
+ if (ole.getDataSize() > 0) {
+ String label = ole.getLabel();
+
+ name = label;
+
+ data = ole.getDataBuffer();
+ }
+ } catch (Ole10NativeException ex) {
+ LOG.warn("Skipping invalid part", ex);
+ }
+ } else {
+ name += '.' + type.getExtension();
+ }
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
+ return new ByteArrayInputStream(data);
+ } else if (inputStream instanceof TikaInputStream) {
+ TikaInputStream tin = (TikaInputStream) inputStream;
+
+ if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
+ POIFSFileSystem fs = new POIFSFileSystem();
+ copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
+ ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+ fs.writeFilesystem(bos2);
+ bos2.close();
+ return new ByteArrayInputStream(bos2.toByteArray());
+ }
+ }
+ return inputStream;
+ }
+
+ protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+ throws IOException {
+ for (Entry entry : sourceDir) {
+ if (entry instanceof DirectoryEntry) {
+ // Need to recurse
+ DirectoryEntry newDir = destDir.createDirectory(entry.getName());
+ copy((DirectoryEntry) entry, newDir);
+ } else {
+ // Copy entry
+ try (InputStream contents = new DocumentInputStream((DocumentEntry) entry)) {
+ destDir.createDocument(entry.getName(), contents);
+ }
+ }
+ }
+ }
+}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index 4ecebfb..6ab763c 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -31,6 +31,7 @@ import java.util.Set;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.extractor.ExtractorFactory;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -77,6 +78,9 @@ import org.xml.sax.helpers.AttributesImpl;
public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
+ static {
+ ExtractorFactory.setAllThreadsPreferEventExtractors(true);
+ }
static final String RELATION_AUDIO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/audio";
static final String RELATION_MEDIA = "http://schemas.microsoft.com/office/2007/relationships/media";
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator
new file mode 100644
index 0000000..e59cba8
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.extractor.microsoft.MSEmbeddedStreamTranslator
\ No newline at end of file
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/pom.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/pom.xml
index 19f94a4..aaa582f 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/pom.xml
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/pom.xml
@@ -139,6 +139,11 @@
<artifactId>tika-parser-xml-module</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-xmp-commons</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<!-- externally maintained parsers -->
<dependency>
@@ -341,7 +346,7 @@
<version>${rat.version}</version>
<configuration>
<excludes>
- <exclude>src/test/resources/test-data/**</exclude>
+ <exclude>src/test/resources/test-documents/**</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/tika-server/README.md b/tika-server/README.md
index e683bef..e0dbb38 100644
--- a/tika-server/README.md
+++ b/tika-server/README.md
@@ -1,3 +1,21 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
# Apache Tika Server
https://cwiki.apache.org/confluence/display/TIKA/TikaJAXRS
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index 8aeb14a..04bb142 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -17,6 +17,12 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
+ <packaging>pom</packaging>
+ <modules>
+ <module>tika-server-core</module>
+ <module>tika-server-classic</module>
+ <module>tika-server-client</module>
+ </modules>
<parent>
<groupId>org.apache.tika</groupId>
@@ -29,363 +35,7 @@
<name>Apache Tika server</name>
<url>http://tika.apache.org/</url>
- <properties>
- </properties>
- <pluginRepositories>
- <pluginRepository>
- <id>miredot</id>
- <name>MireDot Releases</name>
- <url>http://nexus.qmino.com/content/repositories/miredot</url>
- </pluginRepository>
- </pluginRepositories>
-
- <dependencies>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parsers-classic-package</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-translate</artifactId>
- <version>${project.version}</version>
- </dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-langdetect-optimaize</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-serialization</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-xmp</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>net.sf.opencsv</groupId>
- <artifactId>opencsv</artifactId>
- <version>${opencsv.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.cxf</groupId>
- <artifactId>cxf-rt-frontend-jaxrs</artifactId>
- <version>${cxf.version}</version>
- <exclusions>
- <exclusion>
- <groupId>javax.annotation</groupId>
- <artifactId>javax.annotation-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.glassfish.jaxb</groupId>
- <artifactId>jaxb-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>jakarta.xml.bind</groupId>
- <artifactId>jakarta.xml.bind-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.cxf</groupId>
- <artifactId>cxf-rt-rs-service-description</artifactId>
- <version>${cxf.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.cxf</groupId>
- <artifactId>cxf-rt-transports-http-jetty</artifactId>
- <version>${cxf.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.cxf</groupId>
- <artifactId>cxf-rt-rs-security-cors</artifactId>
- <version>${cxf.version}</version>
- </dependency>
-
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>${commons.cli.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- <version>${commons.lang3.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.cxf</groupId>
- <artifactId>cxf-rt-rs-client</artifactId>
- <version>${cxf.version}</version>
- </dependency>
-
- <!-- logging -->
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jcl-over-slf4j</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jul-to-slf4j</artifactId>
- </dependency>
-
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-core</artifactId>
- <version>${project.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
- <build>
- <plugins>
- <plugin>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <redirectTestOutputToFile>true</redirectTestOutputToFile>
- <argLine>-da -XX:+HeapDumpOnOutOfMemoryError -Xmx512m</argLine>
- <systemPropertyVariables>
- <java.util.logging.config.file>
- ${basedir}/src/main/resources/commons-logging.properties
- </java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-shade-plugin</artifactId>
- <version>${maven.shade.version}</version>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <createDependencyReducedPom>
- false
- </createDependencyReducedPom>
- <artifactSet>
- <excludes>
- <exclude>org.apache.tika:tika-parsers:jar:</exclude>
- </excludes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- <exclude>META-INF/*.txt</exclude>
- <exclude>META-INF/ASL2.0</exclude>
- <exclude>META-INF/DEPENDENCIES</exclude>
- <exclude>META-INF/LICENSE</exclude>
- <exclude>META-INF/NOTICE</exclude>
- <exclude>META-INF/README</exclude>
- <exclude>LICENSE.txt</exclude>
- <exclude>NOTICE.txt</exclude>
- <exclude>CHANGES</exclude>
- <exclude>README</exclude>
- <exclude>builddef.lst</exclude>
- <!-- clutter not needed in jar -->
- <exclude>resources/grib1/nasa/README*.pdf</exclude>
- <exclude>resources/grib1/**/readme*.txt</exclude>
- <exclude>resources/grib2/**/readme*.txt</exclude>
- <!-- TIKA-763: Workaround to avoid including LGPL classes -->
- <exclude>ucar/nc2/iosp/fysat/Fysat*.class</exclude>
- <exclude>ucar/nc2/dataset/transform/VOceanSG1*class</exclude>
- <exclude>ucar/unidata/geoloc/vertical/OceanSG*.class</exclude>
- </excludes>
- </filter>
- </filters>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>org.apache.tika.server.TikaServerCli</mainClass>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
- <resource>META-INF/LICENSE</resource>
- <file>target/classes/META-INF/LICENSE</file>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
- <resource>META-INF/NOTICE</resource>
- <file>target/classes/META-INF/NOTICE</file>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
- <resource>META-INF/DEPENDENCIES</resource>
- <file>target/classes/META-INF/DEPENDENCIES</file>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>META-INF/spring.handlers</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>META-INF/spring.schemas</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>META-INF/cxf/cxf.extension</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/extensions.xml</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/cxf/extensions.xml</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>META-INF/cxf/bus-extensions.txt</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/cxf/bus-extensions.xml</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/wsdl.plugin.xml</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/tools.service.validator.xml</resource>
- </transformer>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
- <resource>META-INF/cxf/java2wsbeans.xml</resource>
- </transformer>
- </transformers>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <archive>
- <manifestEntries>
- <Automatic-Module-Name>org.apache.tika.server</Automatic-Module-Name>
- </manifestEntries>
- </archive>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>test-jar</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.rat</groupId>
- <artifactId>apache-rat-plugin</artifactId>
- <version>${rat.version}</version>
- <configuration>
- <excludes>
- <exclude>src/main/resources/tikaserver-version.properties</exclude>
- <exclude>src/test/resources/*</exclude>
- <exclude>README.md</exclude>
- </excludes>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <configuration>
- <descriptors>
- <descriptor>assembly.xml</descriptor>
- </descriptors>
- </configuration>
- <executions>
- <execution>
- <id>make-assembly</id> <!-- this is used for inheritance merges -->
- <phase>package</phase> <!-- bind to the packaging phase -->
- <goals>
- <goal>single</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- <profiles>
- <profile>
- <id>server</id>
- <build>
- <defaultGoal>test</defaultGoal>
- <plugins>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>exec-maven-plugin</artifactId>
- <version>1.6.0</version>
- <executions>
- <execution>
- <phase>test</phase>
- <goals>
- <goal>java</goal>
- </goals>
- <configuration>
- <mainClass>org.apache.tika.server.TikaServerCli</mainClass>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>com.qmino</groupId>
- <artifactId>miredot-plugin</artifactId>
- <version>2.2</version>
- <executions>
- <execution>
- <goals>
- <goal>restdoc</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <organizationId>c579ca2e-c194-4a14-a7f0-f39ef4ec3c2d</organizationId>
- <restModel>
- <restFramework>
- <name>jax-rs</name>
- </restFramework>
- </restModel>
- <output>
- <html/>
- </output>
- <!-- Free Miredot license key, valid until Jan 31st(?), 2025
- https://issues.apache.org/jira/browse/TIKA-2253
- https://issues.apache.org/jira/browse/TIKA-3038-->
- <licence>
- cHJvamVjdHxvcmcuYXBhY2hlLnRpa2EudGlrYS1zZXJ2ZXJ8MjAyNS0wMy0wMXxmYWxzZXwtMSNNQ3dDRkdtemsraHJDdjNodXlhN0d1KzlQM1Y0bUNxMEFoUnRHc25DQ1VyMjhLQ2swU0R5RGp1WkVpcXVlZz09
- </licence>
- <!-- insert other configuration here (optional) -->
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
<organization>
<name>The Apache Software Foundation</name>
diff --git a/tika-server/src/test/resources/mock/fake_oom.xml b/tika-server/src/test/resources/mock/fake_oom.xml
deleted file mode 100644
index 42aa9a7..0000000
--- a/tika-server/src/test/resources/mock/fake_oom.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <throw class="java.lang.OutOfMemoryError">oom message</throw>
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/heavy_hang_100.xml b/tika-server/src/test/resources/mock/heavy_hang_100.xml
deleted file mode 100644
index b1413bc..0000000
--- a/tika-server/src/test/resources/mock/heavy_hang_100.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="100" heavy="true" pulse_millis="50" />
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/heavy_hang_30000.xml b/tika-server/src/test/resources/mock/heavy_hang_30000.xml
deleted file mode 100644
index f1f5b67..0000000
--- a/tika-server/src/test/resources/mock/heavy_hang_30000.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="30000" heavy="true" pulse_millis="100" />
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/null_pointer.xml b/tika-server/src/test/resources/mock/null_pointer.xml
deleted file mode 100644
index 80043c0..0000000
--- a/tika-server/src/test/resources/mock/null_pointer.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <throw class="java.lang.NullPointerException">null pointer message</throw>
-</mock>
diff --git a/tika-server/src/test/resources/mock/real_oom.xml b/tika-server/src/test/resources/mock/real_oom.xml
deleted file mode 100644
index 168751a..0000000
--- a/tika-server/src/test/resources/mock/real_oom.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <oom/>
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/system_exit.xml b/tika-server/src/test/resources/mock/system_exit.xml
deleted file mode 100644
index 75d1d3b..0000000
--- a/tika-server/src/test/resources/mock/system_exit.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <system_exit />
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/testStaticStdOutErr.xml b/tika-server/src/test/resources/mock/testStaticStdOutErr.xml
deleted file mode 100644
index f08229e..0000000
--- a/tika-server/src/test/resources/mock/testStaticStdOutErr.xml
+++ /dev/null
@@ -1,75 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
-
-
- <!-- action can be "add" or "set" -->
- <metadata action="add" name="dc:creator">Nikolai Lobachevsky</metadata>
- <!-- element is the name of the sax event to write, p=paragraph
- if the element is not specified, the default is <p> -->
- <hang millis="100" heavy="false" />
- <write element="p">hello world! </write>
- <!-- write something to System.out -->
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out static="true" random="200000"/>
- <hang millis="10" heavy="false" />
- <print_out static="true" random="200000"/>
- <hang millis="10" heavy="false" />
- <print_out static="true" random="200000"/>
- <hang millis="10" heavy="false" />
- <print_err static="true" random="200000"/>
- <hang millis="10" heavy="false" />
- <print_err static="true" random="200000"/>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="80" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="70" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="60" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="30" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="30" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="50" heavy="false" />
- <!-- write something to System.err -->
- <print_err>writing to System.err</print_err>
-
- <write element="p">Even after the calls to Sys.out/err...hello world! the quick brown fox jumped over the lazy dog</write>
-
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/testStdOutErr.xml b/tika-server/src/test/resources/mock/testStdOutErr.xml
deleted file mode 100644
index ce0682f..0000000
--- a/tika-server/src/test/resources/mock/testStdOutErr.xml
+++ /dev/null
@@ -1,75 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
-
-
- <!-- action can be "add" or "set" -->
- <metadata action="add" name="dc:creator">Nikolai Lobachevsky</metadata>
- <!-- element is the name of the sax event to write, p=paragraph
- if the element is not specified, the default is <p> -->
- <hang millis="100" heavy="false" />
- <write element="p">hello world! </write>
- <!-- write something to System.out -->
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="100" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="100" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="100" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="80" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="70" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="60" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="1" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="20" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="30" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="10" heavy="false" />
- <print_out>writing to System.out</print_out>
- <hang millis="50" heavy="false" />
- <!-- write something to System.err -->
- <print_err>writing to System.err</print_err>
-
- <write element="p">Even after the calls to Sys.out/err...hello world! the quick brown fox jumped over the lazy dog</write>
-
-</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/thread_interrupt.xml b/tika-server/src/test/resources/mock/thread_interrupt.xml
deleted file mode 100644
index 3e54512..0000000
--- a/tika-server/src/test/resources/mock/thread_interrupt.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <thread_interrupt />
-</mock>
\ No newline at end of file
diff --git a/tika-server/assembly.xml b/tika-server/tika-server-classic/assembly.xml
similarity index 94%
rename from tika-server/assembly.xml
rename to tika-server/tika-server-classic/assembly.xml
index 216ee71..b346df4 100644
--- a/tika-server/assembly.xml
+++ b/tika-server/tika-server-classic/assembly.xml
@@ -38,7 +38,7 @@
</fileSets>
<files>
<file>
- <source>${project.build.directory}/tika-server-${project.version}.jar</source>
+ <source>${project.build.directory}/tika-server-classic-${project.version}.jar</source>
<outputDirectory/>
<destName>tika-server.jar</destName>
</file>
diff --git a/tika-server/bin/init.d/tika b/tika-server/tika-server-classic/bin/init.d/tika
similarity index 100%
rename from tika-server/bin/init.d/tika
rename to tika-server/tika-server-classic/bin/init.d/tika
diff --git a/tika-server/bin/install_tika_service.sh b/tika-server/tika-server-classic/bin/install_tika_service.sh
similarity index 100%
rename from tika-server/bin/install_tika_service.sh
rename to tika-server/tika-server-classic/bin/install_tika_service.sh
diff --git a/tika-server/bin/tika b/tika-server/tika-server-classic/bin/tika
similarity index 100%
rename from tika-server/bin/tika
rename to tika-server/tika-server-classic/bin/tika
diff --git a/tika-server/bin/tika.in.sh b/tika-server/tika-server-classic/bin/tika.in.sh
similarity index 100%
rename from tika-server/bin/tika.in.sh
rename to tika-server/tika-server-classic/bin/tika.in.sh
diff --git a/tika-server/tika-server-classic/pom.xml b/tika-server/tika-server-classic/pom.xml
new file mode 100644
index 0000000..68cae1e
--- /dev/null
+++ b/tika-server/tika-server-classic/pom.xml
@@ -0,0 +1,245 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-server</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-server-classic</artifactId>
+
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-server-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parsers-classic-package</artifactId>
+ <version>${project.version}</version>
+ <!-- this is brought in by tika-server-core. Don't duplicate -->
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-digest-commons</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-zip-commons</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-xmp</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- test jars -->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-server-core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <redirectTestOutputToFile>true</redirectTestOutputToFile>
+ <argLine>-da -XX:+HeapDumpOnOutOfMemoryError -Xmx512m</argLine>
+ <systemPropertyVariables>
+ <java.util.logging.config.file>
+ ${basedir}/src/main/resources/commons-logging.properties
+ </java.util.logging.config.file>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>
+ false
+ </createDependencyReducedPom>
+ <artifactSet>
+ <excludes>
+ <exclude>org.apache.tika:tika-parsers-classic-package:jar:</exclude>
+ <exclude>org.apache.tika:tika-server-core:jar:</exclude>
+ </excludes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>META-INF/*.txt</exclude>
+ <exclude>META-INF/ASL2.0</exclude>
+ <exclude>META-INF/DEPENDENCIES</exclude>
+ <exclude>META-INF/LICENSE</exclude>
+ <exclude>META-INF/NOTICE</exclude>
+ <exclude>META-INF/README</exclude>
+ <exclude>LICENSE.txt</exclude>
+ <exclude>NOTICE.txt</exclude>
+ <exclude>CHANGES</exclude>
+ <exclude>README</exclude>
+ <exclude>builddef.lst</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>org.apache.tika.server.core.TikaServerCli</mainClass>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/LICENSE</resource>
+ <file>target/classes/META-INF/LICENSE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/NOTICE</resource>
+ <file>target/classes/META-INF/NOTICE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/DEPENDENCIES</resource>
+ <file>target/classes/META-INF/DEPENDENCIES</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>META-INF/spring.handlers</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>META-INF/spring.schemas</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>META-INF/cxf/cxf.extension</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/extensions.xml</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/cxf/extensions.xml</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>META-INF/cxf/bus-extensions.txt</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/cxf/bus-extensions.xml</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/wsdl.plugin.xml</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/tools.service.validator.xml</resource>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.XmlAppendingTransformer">
+ <resource>META-INF/cxf/java2wsbeans.xml</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Automatic-Module-Name>org.apache.tika.server.classic</Automatic-Module-Name>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <version>${rat.version}</version>
+ <configuration>
+ <excludes>
+ <exclude>src/main/resources/tikaserver-version.properties</exclude>
+ <exclude>src/test/resources/test-documents/*</exclude>
+ <exclude>src/TODO</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptors>
+ <descriptor>assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id> <!-- this is used for inheritance merges -->
+ <phase>package</phase> <!-- bind to the packaging phase -->
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
new file mode 100644
index 0000000..9dcf61d
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.classic.config;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.DocumentSelector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParserConfig;
+import org.apache.tika.server.core.ParseContextConfig;
+
+import javax.ws.rs.core.MultivaluedMap;
+
+import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
+
+public class PDFServerConfig implements ParseContextConfig {
+
+ public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
+
+ @Override
+ public void configure(MultivaluedMap<String, String> httpHeaders,
+ Metadata metadata, ParseContext parseContext) {
+ //lazily initialize configs
+ //if a header is submitted, any params set in --tika-config tika-config.xml
+ //upon server startup will be ignored.
+ PDFParserConfig pdfParserConfig = null;
+ for (String key : httpHeaders.keySet()) {
+ if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) {
+ pdfParserConfig = (pdfParserConfig == null) ? new PDFParserConfig() : pdfParserConfig;
+ processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
+ }
+ }
+ if (pdfParserConfig != null) {
+ parseContext.set(PDFParserConfig.class, pdfParserConfig);
+ }
+ }
+}
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
new file mode 100644
index 0000000..3db0859
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.classic.config;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.DocumentSelector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.ocr.TesseractOCRConfig;
+import org.apache.tika.server.core.ParseContextConfig;
+
+import javax.ws.rs.core.MultivaluedMap;
+
+import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
+
+public class TesseractServerConfig implements ParseContextConfig {
+
+ public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
+ @Override
+ public void configure(MultivaluedMap<String, String> httpHeaders,
+ Metadata metadata, ParseContext parseContext) {
+ //lazily initialize configs
+ //if a header is submitted, any params set in --tika-config tika-config.xml
+ //upon server startup will be ignored.
+ TesseractOCRConfig ocrConfig = null;
+ DocumentSelector documentSelector = null;
+ for (String key : httpHeaders.keySet()) {
+ if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) {
+ ocrConfig = (ocrConfig == null) ? new TesseractOCRConfig() : ocrConfig;
+ processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
+ }
+ }
+ if (ocrConfig != null) {
+ parseContext.set(TesseractOCRConfig.class, ocrConfig);
+ }
+ }
+}
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/resource/XMPMetadataResource.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/resource/XMPMetadataResource.java
new file mode 100644
index 0000000..bcf4869
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/resource/XMPMetadataResource.java
@@ -0,0 +1,49 @@
+package org.apache.tika.server.classic.resource;
+
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.apache.tika.server.core.resource.TikaServerResource;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.UriInfo;
+import java.io.InputStream;
+
+public class XMPMetadataResource extends MetadataResource implements TikaServerResource {
+
+ @PUT
+ @Path("{field}")
+ @Produces({"application/rdf+xml"})
+ @Override
+ public Response getMetadataField(InputStream is, @Context HttpHeaders httpHeaders,
+ @Context UriInfo info, @PathParam("field") String field) throws Exception {
+ return super.getMetadataField(is, httpHeaders, info, field);
+ }
+
+ @POST
+ @Consumes("multipart/form-data")
+ @Produces({"application/rdf+xml"})
+ @Path("form")
+ public Response getMetadataFromMultipart(Attachment att, @Context UriInfo info) throws Exception {
+ return Response.ok(
+ parseMetadata(att.getObject(InputStream.class), new Metadata(),
+ att.getHeaders(), info)).build();
+ }
+
+ @PUT
+ @Produces({"application/rdf+xml"})
+ public Response getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info) throws Exception {
+ Metadata metadata = new Metadata();
+ return Response.ok(
+ parseMetadata(TikaResource.getInputStream(is, metadata, httpHeaders), metadata, httpHeaders.getRequestHeaders(), info)).build();
+ }
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/XMPMessageBodyWriter.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/writer/XMPMessageBodyWriter.java
similarity index 93%
rename from tika-server/src/main/java/org/apache/tika/server/writer/XMPMessageBodyWriter.java
rename to tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/writer/XMPMessageBodyWriter.java
index 8e58793..88dcbf3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/XMPMessageBodyWriter.java
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/writer/XMPMessageBodyWriter.java
@@ -15,13 +15,12 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.classic.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.ext.MessageBodyWriter;
import javax.ws.rs.ext.Provider;
import java.io.IOException;
@@ -33,13 +32,14 @@ import java.lang.reflect.Type;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.server.core.writer.TikaServerWriter;
import org.apache.tika.xmp.XMPMetadata;
import static java.nio.charset.StandardCharsets.UTF_8;
@Provider
@Produces("application/rdf+xml")
-public class XMPMessageBodyWriter implements MessageBodyWriter<Metadata> {
+public class XMPMessageBodyWriter implements TikaServerWriter<Metadata> {
private static MediaType RDF_XML = MediaType.valueOf("application/rdf+xml");
diff --git a/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
new file mode 100644
index 0000000..2e04320
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.server.classic.config.PDFServerConfig
+org.apache.tika.server.classic.config.TesseractServerConfig
\ No newline at end of file
diff --git a/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource
new file mode 100644
index 0000000..0940048
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.server.classic.resource.XMPMetadataResource
\ No newline at end of file
diff --git a/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.writer.TikaServerWriter b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.writer.TikaServerWriter
new file mode 100644
index 0000000..77f7ca1
--- /dev/null
+++ b/tika-server/tika-server-classic/src/main/resources/META-INF/services/org.apache.tika.server.core.writer.TikaServerWriter
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.server.classic.writer.XMPMessageBodyWriter
\ No newline at end of file
diff --git a/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
similarity index 87%
rename from tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
index 6c027ef..d531c2e 100644
--- a/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
@@ -15,31 +15,33 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.resource.DetectorResource;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.writer.ZipWriter;
+import org.junit.Test;
import javax.ws.rs.core.Response;
-
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
-import org.apache.cxf.jaxrs.client.WebClient;
-import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.DetectorResource;
-import org.apache.tika.server.writer.TarWriter;
-import org.apache.tika.server.writer.ZipWriter;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
public class DetectorResourceTest extends CXFTestBase {
private static final String DETECT_PATH = "/detect";
private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream";
- private static final String FOO_CSV = "foo.csv";
- private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02";
+ private static final String FOO_CSV = "test-documents/foo.csv";
+ private static final String CDEC_CSV_NO_EXT = "test-documents/CDEC_WEATHER_2010_03_02";
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
diff --git a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/MetadataResourceTest.java
similarity index 91%
rename from tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/MetadataResourceTest.java
index 01acbaf..7319d24 100644
--- a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/MetadataResourceTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
@@ -40,12 +40,15 @@ import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadata;
-import org.apache.tika.server.resource.MetadataResource;
-import org.apache.tika.server.writer.CSVMessageBodyWriter;
-import org.apache.tika.server.writer.JSONMessageBodyWriter;
-import org.apache.tika.server.writer.TextMessageBodyWriter;
-import org.apache.tika.server.writer.XMPMessageBodyWriter;
+import org.apache.tika.server.classic.resource.XMPMetadataResource;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.writer.CSVMessageBodyWriter;
+import org.apache.tika.server.core.writer.JSONMessageBodyWriter;
+import org.apache.tika.server.core.writer.TextMessageBodyWriter;
+import org.apache.tika.server.classic.writer.XMPMessageBodyWriter;
import org.junit.Assert;
+import org.junit.Ignore;
import org.junit.Test;
public class MetadataResourceTest extends CXFTestBase {
@@ -53,9 +56,11 @@ public class MetadataResourceTest extends CXFTestBase {
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
- sf.setResourceClasses(MetadataResource.class);
+ sf.setResourceClasses(MetadataResource.class, XMPMetadataResource.class);
sf.setResourceProvider(MetadataResource.class,
new SingletonResourceProvider(new MetadataResource()));
+ sf.setResourceProvider(XMPMetadataResource.class,
+ new SingletonResourceProvider(new XMPMetadataResource()));
}
@Override
@@ -192,6 +197,7 @@ public class MetadataResourceTest extends CXFTestBase {
}
@Test
+ @Ignore("TODO: add back in xmp handler")
public void testGetField_Author_TEXT_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
@@ -220,6 +226,7 @@ public class MetadataResourceTest extends CXFTestBase {
}
@Test
+ @Ignore("TODO: until we can reintegrate xmpwriter")
public void testGetField_Author_XMP_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataFilterTest.java
similarity index 82%
rename from tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataFilterTest.java
index 9799f8b..11fcc77 100644
--- a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataFilterTest.java
@@ -15,21 +15,17 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
-import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.OfficeOpenXMLExtended;
-import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
-import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
-import org.apache.tika.sax.RecursiveParserWrapperHandler;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.RecursiveMetadataResource;
+import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
import org.junit.Test;
import javax.ws.rs.core.Response;
@@ -42,10 +38,7 @@ import java.util.List;
import java.util.Set;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.tika.TikaTest.assertNotContained;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -53,11 +46,11 @@ public class RecursiveMetadataFilterTest extends CXFTestBase {
private static final String META_PATH = "/rmeta";
- private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
+ private static final String TEST_RECURSIVE_DOC = "test-documents/test_recursive_embedded.docx";
@Override
protected InputStream getTikaConfigInputStream() {
- return getClass().getResourceAsStream("TIKA-3137-include.xml");
+ return getClass().getResourceAsStream("/config/TIKA-3137-include.xml");
}
@Override
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
similarity index 97%
rename from tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
index 38dcb05..3a32f13 100644
--- a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.tika.TikaTest.assertNotContained;
@@ -37,14 +37,16 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.RecursiveMetadataResource;
+import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
import org.junit.Test;
public class RecursiveMetadataResourceTest extends CXFTestBase {
@@ -57,7 +59,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
private static final String UNPARSEABLE_PATH = "/somethingOrOther";
private static final String SLASH = "/";
- private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
+ private static final String TEST_RECURSIVE_DOC = "test-documents/test_recursive_embedded.docx";
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
@@ -380,7 +382,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
assertEquals("true", metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.WRITE_LIMIT_REACHED));
assertContains("When in the Course of human events it becomes necessary for one people",
metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
- assertNotContained("to dissolve",
+ TikaTest.assertNotContained("to dissolve",
metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
}
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
similarity index 97%
rename from tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
index 93e049a..8d78ba5 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -35,7 +35,8 @@ import org.apache.tika.detect.microsoft.POIFSContainerDetector;
import org.apache.tika.detect.zip.DefaultZipContainerDetector;
import org.apache.tika.mime.MimeTypes;
-import org.apache.tika.server.resource.TikaDetectors;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.TikaDetectors;
import org.gagravarr.tika.OggDetector;
import org.junit.Test;
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
similarity index 70%
copy from tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
copy to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
index 6b2be33..028c68c 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
@@ -14,32 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-package org.apache.tika.server;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import javax.ws.rs.core.Response;
-
-import java.io.InputStream;
-import java.util.List;
-import java.util.Map;
+package org.apache.tika.server.classic;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.TikaMimeTypes;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.TikaMimeTypes;
import org.junit.Test;
-public class TikaMimeTypesTest extends CXFTestBase {
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Map;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TikaMimeTypesTest extends CXFTestBase {
private static final Gson GSON = new GsonBuilder().create();
private static final String MIMETYPES_PATH = "/mime-types";
+
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(TikaMimeTypes.class);
@@ -54,45 +53,6 @@ public class TikaMimeTypesTest extends CXFTestBase {
}
@Test
- public void testGetPlainText() throws Exception {
- Response response = WebClient
- .create(endPoint + MIMETYPES_PATH)
- .type("text/plain")
- .accept("text/plain")
- .get();
-
- String text = getStringFromInputStream((InputStream) response.getEntity());
- assertContains("text/plain", text);
- assertContains("application/xml", text);
- assertContains("video/x-ogm", text);
-
- assertContains("supertype: video/ogg", text);
-
- assertContains("alias: image/x-ms-bmp", text);
- }
-
- @Test
- public void testGetHTML() throws Exception {
- Response response = WebClient
- .create(endPoint + MIMETYPES_PATH)
- .type("text/html")
- .accept("text/html")
- .get();
-
- String text = getStringFromInputStream((InputStream) response.getEntity());
- assertContains("text/plain", text);
- assertContains("application/xml", text);
- assertContains("video/x-ogm", text);
-
- assertContains("<h2>text/plain", text);
- assertContains("name=\"text/plain", text);
-
- assertContains("Super Type: <a href=\"#video/ogg\">video/ogg", text);
-
- assertContains("Alias: image/x-ms-bmp", text);
- }
-
- @Test
@SuppressWarnings("unchecked")
public void testGetJSON() throws Exception {
Response response = WebClient
@@ -126,4 +86,5 @@ public class TikaMimeTypesTest extends CXFTestBase {
assertEquals("video/ogg", ogm.get("supertype"));
assertEquals("org.gagravarr.tika.OggParser", ogm.get("parser"));
}
+
}
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
similarity index 98%
rename from tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
index cd3b083..d1d093e 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -32,9 +32,9 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
-import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.pkg.PackageParser;
-import org.apache.tika.server.resource.TikaParsers;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.TikaParsers;
import org.gagravarr.tika.OpusParser;
import org.junit.Test;
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
similarity index 84%
rename from tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
index 81e3ed5..84b57f5 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaResourceTest.java
@@ -15,11 +15,10 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.commons.io.IOUtils;
import org.apache.cxf.attachment.AttachmentUtil;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
@@ -27,20 +26,22 @@ import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
-import org.apache.tika.server.resource.TikaResource;
+import org.apache.tika.server.classic.config.PDFServerConfig;
+import org.apache.tika.server.classic.config.TesseractServerConfig;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.config.DocumentSelectorConfig;
+import org.apache.tika.server.core.config.PasswordProviderConfig;
+import org.apache.tika.server.core.resource.TikaResource;
import org.junit.Ignore;
import org.junit.Test;
import javax.ws.rs.ProcessingException;
-import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
import static org.apache.cxf.helpers.HttpHeaderHelper.CONTENT_ENCODING;
import static org.junit.Assert.assertEquals;
@@ -48,9 +49,9 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class TikaResourceTest extends CXFTestBase {
- public static final String TEST_DOC = "test.doc";
- public static final String TEST_PASSWORD_PROTECTED = "password.xls";
- private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
+ public static final String TEST_DOC = "test-documents/test.doc";
+ public static final String TEST_PASSWORD_PROTECTED = "test-documents/password.xls";
+ private static final String TEST_RECURSIVE_DOC = "test-documents/test_recursive_embedded.docx";
private static final String TEST_OOM = "mock/fake_oom.xml";
private static final String STREAM_CLOSED_FAULT = "java.io.IOException: Stream Closed";
@@ -140,7 +141,7 @@ public class TikaResourceTest extends CXFTestBase {
//boilerpipe
Response response = WebClient.create(endPoint + TIKA_PATH + "/main")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("testHTML.html"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testHTML.html"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertTrue(responseMsg.contains("Title : Test Indexation Html"));
@@ -151,7 +152,7 @@ public class TikaResourceTest extends CXFTestBase {
public void testTextMainMultipart() throws Exception {
//boilerpipe
Attachment attachmentPart =
- new Attachment("myhtml", "text/html", ClassLoader.getSystemResourceAsStream("testHTML.html"));
+ new Attachment("myhtml", "text/html", ClassLoader.getSystemResourceAsStream("test-documents/testHTML.html"));
Response response = WebClient.create(endPoint + TIKA_PATH+"/form/main")
@@ -164,22 +165,14 @@ public class TikaResourceTest extends CXFTestBase {
assertFalse(responseMsg.contains("Indexation du fichier"));
}
- @Test
- public void testApplicationWadl() throws Exception {
- Response response = WebClient
- .create(endPoint + TIKA_PATH + "?_wadl")
- .accept("text/plain").get();
- String resp = getStringFromInputStream((InputStream) response
- .getEntity());
- assertTrue(resp.startsWith("<application"));
- }
+
@Test
public void testPasswordXLS() throws Exception {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/vnd.ms-excel")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/password.xls"));
assertEquals(UNPROCESSEABLE, response.getStatus());
}
@@ -204,7 +197,7 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/vnd.ms-excel")
.accept("text/html")
- .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/password.xls"));
assertEquals(UNPROCESSEABLE, response.getStatus());
}
@@ -225,7 +218,7 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/vnd.ms-excel")
.accept("text/xml")
- .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/password.xls"));
assertEquals(UNPROCESSEABLE, response.getStatus());
}
@@ -281,7 +274,7 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/rtf")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("testRTF_npeFromWMFInTikaServer.rtf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testRTF_npeFromWMFInTikaServer.rtf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertTrue(responseMsg.contains("Example text"));
@@ -296,11 +289,11 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"Language", "eng+fra")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"MinFileSizeToOcr", "10")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"MaxFileSizeToOcr", "1000000000")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only")
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX+"Language", "eng+fra")
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX+"MinFileSizeToOcr", "10")
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX+"MaxFileSizeToOcr", "1000000000")
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertContains("Happy New Year 2003!", responseMsg);
@@ -316,8 +309,8 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "no_ocr")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "no_ocr")
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertTrue(responseMsg.trim().equals(""));
@@ -325,8 +318,8 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only")
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertContains("Happy New Year 2003!", responseMsg);
@@ -335,8 +328,8 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX + "OcrStrategy", "non-sense-value")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX + "OcrStrategy", "non-sense-value")
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
assertEquals(400, response.getStatus());
}
@@ -347,7 +340,7 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testPDFTwoTextBoxes.pdf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
responseMsg = responseMsg.replaceAll("[\r\n ]+", " ").trim();
@@ -357,8 +350,8 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"sortByPosition", "false")
- .put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf"));
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"sortByPosition", "false")
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testPDFTwoTextBoxes.pdf"));
responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
responseMsg = responseMsg.replaceAll("[\r\n ]+", " ").trim();
@@ -368,7 +361,7 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testPDFTwoTextBoxes.pdf"));
responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
responseMsg = responseMsg.replaceAll("[\r\n ]+", " ").trim();
@@ -384,7 +377,7 @@ public class TikaResourceTest extends CXFTestBase {
Attachment attachmentPart = new Attachment(
"my-docx-file",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
- ClassLoader.getSystemResourceAsStream("2pic.docx")
+ ClassLoader.getSystemResourceAsStream("test-documents/2pic.docx")
);
Response response = WebClient.create(endPoint + TIKA_PATH + "/form")
@@ -407,10 +400,10 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX +
"tesseractPath",
"C://tmp//hello.bat\u0000")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
assertEquals(400, response.getStatus());
} catch (ProcessingException e) {
//can't tell why this intermittently happens. :(
@@ -420,10 +413,10 @@ public class TikaResourceTest extends CXFTestBase {
response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX +
"tesseractPath",
"bogus path")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
assertEquals(200, response.getStatus());
}
@@ -432,10 +425,10 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
+ .header(TesseractServerConfig.X_TIKA_OCR_HEADER_PREFIX +
"trustedPageSeparator",
"\u0020")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
assertEquals(400, response.getStatus());
}
@@ -445,38 +438,15 @@ public class TikaResourceTest extends CXFTestBase {
Response response = WebClient.create(endPoint + TIKA_PATH)
.type("application/pdf")
.accept("text/plain")
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX +
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX +
"averageCharTolerance",
"2.0")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
assertEquals(200, response.getStatus());
}
- @Test
- public void testOOMInLegacyMode() throws Exception {
-
- Response response = null;
- try {
- response = WebClient
- .create(endPoint + TIKA_PATH)
- .accept("text/plain")
- .put(ClassLoader
- .getSystemResourceAsStream(TEST_OOM));
- } catch (Exception e) {
- //oom may or may not cause an exception depending
- //on the timing
- }
-
- response = WebClient
- .create(endPoint + TIKA_PATH)
- .accept("text/plain")
- .put(ClassLoader
- .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
- String responseMsg = getStringFromInputStream((InputStream) response.getEntity());
- assertContains("plundered our seas", responseMsg);
- }
@Test
public void testUnicodePasswordProtectedSpaces() throws Exception {
@@ -485,8 +455,8 @@ public class TikaResourceTest extends CXFTestBase {
final String encoded = new Base64().encodeAsString(password.getBytes(StandardCharsets.UTF_8));
Response response = WebClient.create(endPoint + TIKA_PATH)
.accept("text/plain")
- .header(TikaResource.PASSWORD_BASE64_UTF8, encoded)
- .put(ClassLoader.getSystemResourceAsStream("testPassword4Spaces.pdf"));
+ .header(PasswordProviderConfig.PASSWORD_BASE64_UTF8, encoded)
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testPassword4Spaces.pdf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertContains("Just some text.", responseMsg);
@@ -499,8 +469,8 @@ public class TikaResourceTest extends CXFTestBase {
final String encoded = new Base64().encodeAsString(password.getBytes(StandardCharsets.UTF_8));
Response response = WebClient.create(endPoint + TIKA_PATH)
.accept("text/plain")
- .header(TikaResource.PASSWORD_BASE64_UTF8, encoded)
- .put(ClassLoader.getSystemResourceAsStream("testUnicodePassword.pdf"));
+ .header(PasswordProviderConfig.PASSWORD_BASE64_UTF8, encoded)
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testUnicodePassword.pdf"));
String responseMsg = getStringFromInputStream((InputStream) response
.getEntity());
assertContains("Just some text.", responseMsg);
@@ -511,14 +481,14 @@ public class TikaResourceTest extends CXFTestBase {
public void testSkipEmbedded() throws Exception {
Response response = WebClient.create(endPoint + TIKA_PATH)
.accept("text/plain")
- .header(TikaResource.X_TIKA_SKIP_EMBEDDED_HEADER, "false")
+ .header(DocumentSelectorConfig.X_TIKA_SKIP_EMBEDDED_HEADER, "false")
.put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
String responseMsg = getStringFromInputStream((InputStream) response.getEntity());
assertContains("embed4.txt", responseMsg);
response = WebClient.create(endPoint + TIKA_PATH)
.accept("text/plain")
- .header(TikaResource.X_TIKA_SKIP_EMBEDDED_HEADER, "true")
+ .header(DocumentSelectorConfig.X_TIKA_SKIP_EMBEDDED_HEADER, "true")
.put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
responseMsg = getStringFromInputStream((InputStream) response.getEntity());
assertNotFound("embed4.txt", responseMsg);
diff --git a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/UnpackerResourceTest.java
similarity index 80%
rename from tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
rename to tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/UnpackerResourceTest.java
index 039d28f..4c5fae9 100644
--- a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/UnpackerResourceTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.classic;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -35,10 +35,12 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.writer.TarWriter;
-import org.apache.tika.server.resource.UnpackerResource;
-import org.apache.tika.server.writer.ZipWriter;
+import org.apache.tika.server.classic.config.PDFServerConfig;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.resource.UnpackerResource;
+import org.apache.tika.server.core.writer.ZipWriter;
import org.junit.Assume;
import org.junit.Test;
@@ -47,7 +49,7 @@ public class UnpackerResourceTest extends CXFTestBase {
private static final String UNPACKER_PATH = BASE_PATH + "";
private static final String ALL_PATH = BASE_PATH + "/all";
- private static final String TEST_DOC_WAV = "Doc1_ole.doc";
+ private static final String TEST_DOC_WAV = "test-documents/Doc1_ole.doc";
private static final String WAV1_MD5 = "bdd0a78a54968e362445364f95d8dc96";
private static final String WAV1_NAME = "_1310388059/MSj00974840000[1].wav";
private static final String WAV2_MD5 = "3bbd42fb1ac0e46a95350285f16d9596";
@@ -57,7 +59,7 @@ public class UnpackerResourceTest extends CXFTestBase {
private static final String JPG_MD5 = XSL_IMAGE1_MD5;
private static final String JPG2_NAME = "image2.jpg";
private static final String JPG2_MD5 = "b27a41d12c646d7fc4f3826cf8183c68";
- private static final String TEST_DOCX_IMAGE = "2pic.docx";
+ private static final String TEST_DOCX_IMAGE = "test-documents/2pic.docx";
private static final String DOCX_IMAGE1_MD5 = "5516590467b069fa59397432677bad4d";
private static final String DOCX_IMAGE2_MD5 = "a5dd81567427070ce0a2ff3e3ef13a4c";
private static final String DOCX_IMAGE1_NAME = "image1.jpeg";
@@ -89,7 +91,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testDocWAV() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.type(APPLICATION_MSWORD).accept("application/zip")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -101,7 +103,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testDocWAVText() throws Exception {
- Response response = WebClient.create(endPoint + ALL_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + ALL_PATH)
.type(APPLICATION_MSWORD).accept("application/zip")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -113,7 +115,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testDocPicture() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.type(APPLICATION_MSWORD).accept("application/zip")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -124,9 +126,9 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testDocPictureNoOle() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.type(APPLICATION_MSWORD).accept("application/zip")
- .put(ClassLoader.getSystemResourceAsStream("2pic.doc"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/2pic.doc"));
Map<String, String> data = readZipArchive((InputStream) response.getEntity());
assertEquals(JPG2_MD5, data.get(JPG2_NAME));
@@ -134,7 +136,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testImageDOCX() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.accept("application/zip").put(
ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
@@ -145,7 +147,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void test204() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.type("xxx/xxx")
.accept("*/*")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -155,8 +157,8 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testExeDOCX() throws Exception {
- String TEST_DOCX_EXE = "2exe.docx";
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ String TEST_DOCX_EXE = "test-documents/2exe.docx";
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.accept("application/zip")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
@@ -168,9 +170,9 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testImageXSL() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.accept("application/zip")
- .put(ClassLoader.getSystemResourceAsStream("pic.xls"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/pic.xls"));
Map<String, String> data = readZipArchive((InputStream) response.getEntity());
assertEquals(XSL_IMAGE1_MD5, data.get("0.jpg"));
@@ -179,7 +181,7 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testTarDocPicture() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
.type(APPLICATION_MSWORD).accept("application/x-tar")
.put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -190,10 +192,10 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testText() throws Exception {
- Response response = WebClient.create(endPoint + ALL_PATH)
+ Response response = WebClient.create(CXFTestBase.endPoint + ALL_PATH)
.header(CONTENT_TYPE, APPLICATION_XML)
.accept("application/zip")
- .put(ClassLoader.getSystemResourceAsStream("test.doc"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/test.doc"));
String responseMsg = readArchiveText((InputStream) response.getEntity());
assertNotNull(responseMsg);
@@ -202,10 +204,10 @@ public class UnpackerResourceTest extends CXFTestBase {
@Test
public void testPDFImages() throws Exception {
- Response response = WebClient.create(endPoint + UNPACKER_PATH)
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"ExtractInlineImages", "true")
+ Response response = WebClient.create(CXFTestBase.endPoint + UNPACKER_PATH)
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"ExtractInlineImages", "true")
.accept("application/zip")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
Map<String, String> results = readZipArchive((InputStream)response.getEntity());
assertTrue(results.containsKey("image0.png"));
String md5 = results.get("image0.png");
@@ -220,11 +222,11 @@ public class UnpackerResourceTest extends CXFTestBase {
public void testPDFRenderOCR() throws Exception {
Assume.assumeTrue( new TesseractOCRParser().hasTesseract(new TesseractOCRConfig()));
- Response response = WebClient.create(endPoint + ALL_PATH)
- .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"ocrStrategy", "ocr_only")
+ Response response = WebClient.create(CXFTestBase.endPoint + ALL_PATH)
+ .header(PDFServerConfig.X_TIKA_PDF_HEADER_PREFIX+"ocrStrategy", "ocr_only")
.accept("application/zip")
- .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testOCR.pdf"));
String txt = readArchiveText((InputStream)response.getEntity());
- assertContains("Happy New Year", txt);
+ CXFTestBase.assertContains("Happy New Year", txt);
}
}
diff --git a/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml b/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
similarity index 100%
rename from tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml
rename to tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
diff --git a/tika-server/src/test/resources/log4j.properties b/tika-server/tika-server-classic/src/test/resources/log4j.properties
similarity index 97%
copy from tika-server/src/test/resources/log4j.properties
copy to tika-server/tika-server-classic/src/test/resources/log4j.properties
index 6aa860a..c3676a2 100644
--- a/tika-server/src/test/resources/log4j.properties
+++ b/tika-server/tika-server-classic/src/test/resources/log4j.properties
@@ -14,7 +14,7 @@
# limitations under the License.
#info,debug, error,fatal ...
-log4j.rootLogger=info,stderr
+log4j.rootLogger=warn,stderr
#console
log4j.appender.stderr=org.apache.log4j.ConsoleAppender
diff --git a/tika-server/src/test/resources/2exe.docx b/tika-server/tika-server-classic/src/test/resources/test-documents/2exe.docx
similarity index 100%
rename from tika-server/src/test/resources/2exe.docx
rename to tika-server/tika-server-classic/src/test/resources/test-documents/2exe.docx
diff --git a/tika-server/src/test/resources/2pic.doc b/tika-server/tika-server-classic/src/test/resources/test-documents/2pic.doc
similarity index 100%
rename from tika-server/src/test/resources/2pic.doc
rename to tika-server/tika-server-classic/src/test/resources/test-documents/2pic.doc
diff --git a/tika-server/src/test/resources/2pic.docx b/tika-server/tika-server-classic/src/test/resources/test-documents/2pic.docx
similarity index 100%
rename from tika-server/src/test/resources/2pic.docx
rename to tika-server/tika-server-classic/src/test/resources/test-documents/2pic.docx
diff --git a/tika-server/src/test/resources/CDEC_WEATHER_2010_03_02 b/tika-server/tika-server-classic/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02
similarity index 100%
rename from tika-server/src/test/resources/CDEC_WEATHER_2010_03_02
rename to tika-server/tika-server-classic/src/test/resources/test-documents/CDEC_WEATHER_2010_03_02
diff --git a/tika-server/src/test/resources/Doc1_ole.doc b/tika-server/tika-server-classic/src/test/resources/test-documents/Doc1_ole.doc
similarity index 100%
rename from tika-server/src/test/resources/Doc1_ole.doc
rename to tika-server/tika-server-classic/src/test/resources/test-documents/Doc1_ole.doc
diff --git a/tika-server/src/test/resources/foo.csv b/tika-server/tika-server-classic/src/test/resources/test-documents/foo.csv
similarity index 100%
rename from tika-server/src/test/resources/foo.csv
rename to tika-server/tika-server-classic/src/test/resources/test-documents/foo.csv
diff --git a/tika-server/src/test/resources/password.xls b/tika-server/tika-server-classic/src/test/resources/test-documents/password.xls
similarity index 100%
rename from tika-server/src/test/resources/password.xls
rename to tika-server/tika-server-classic/src/test/resources/test-documents/password.xls
diff --git a/tika-server/src/test/resources/pic.xls b/tika-server/tika-server-classic/src/test/resources/test-documents/pic.xls
similarity index 100%
rename from tika-server/src/test/resources/pic.xls
rename to tika-server/tika-server-classic/src/test/resources/test-documents/pic.xls
diff --git a/tika-server/src/test/resources/pic.xlsx b/tika-server/tika-server-classic/src/test/resources/test-documents/pic.xlsx
similarity index 100%
rename from tika-server/src/test/resources/pic.xlsx
rename to tika-server/tika-server-classic/src/test/resources/test-documents/pic.xlsx
diff --git a/tika-server/src/test/resources/test.doc b/tika-server/tika-server-classic/src/test/resources/test-documents/test.doc
similarity index 100%
rename from tika-server/src/test/resources/test.doc
rename to tika-server/tika-server-classic/src/test/resources/test-documents/test.doc
diff --git a/tika-server/src/test/resources/testHTML.html b/tika-server/tika-server-classic/src/test/resources/test-documents/testHTML.html
similarity index 100%
rename from tika-server/src/test/resources/testHTML.html
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testHTML.html
diff --git a/tika-server/src/test/resources/testOCR.pdf b/tika-server/tika-server-classic/src/test/resources/test-documents/testOCR.pdf
similarity index 100%
rename from tika-server/src/test/resources/testOCR.pdf
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testOCR.pdf
diff --git a/tika-server/src/test/resources/testPDFTwoTextBoxes.pdf b/tika-server/tika-server-classic/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf
similarity index 100%
rename from tika-server/src/test/resources/testPDFTwoTextBoxes.pdf
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf
diff --git a/tika-server/src/test/resources/testPassword4Spaces.pdf b/tika-server/tika-server-classic/src/test/resources/test-documents/testPassword4Spaces.pdf
similarity index 100%
rename from tika-server/src/test/resources/testPassword4Spaces.pdf
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testPassword4Spaces.pdf
diff --git a/tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf b/tika-server/tika-server-classic/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf
similarity index 100%
rename from tika-server/src/test/resources/testRTF_npeFromWMFInTikaServer.rtf
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testRTF_npeFromWMFInTikaServer.rtf
diff --git a/tika-server/src/test/resources/testUnicodePassword.pdf b/tika-server/tika-server-classic/src/test/resources/test-documents/testUnicodePassword.pdf
similarity index 100%
rename from tika-server/src/test/resources/testUnicodePassword.pdf
rename to tika-server/tika-server-classic/src/test/resources/test-documents/testUnicodePassword.pdf
diff --git a/tika-server/src/test/resources/test_recursive_embedded.docx b/tika-server/tika-server-classic/src/test/resources/test-documents/test_recursive_embedded.docx
similarity index 100%
rename from tika-server/src/test/resources/test_recursive_embedded.docx
rename to tika-server/tika-server-classic/src/test/resources/test-documents/test_recursive_embedded.docx
diff --git a/tika-server/src/test/resources/org/apache/tika/server/tika-config-for-server-tests.xml b/tika-server/tika-server-client/pom.xml
similarity index 59%
copy from tika-server/src/test/resources/org/apache/tika/server/tika-config-for-server-tests.xml
copy to tika-server/tika-server-client/pom.xml
index 8867655..227a358 100644
--- a/tika-server/src/test/resources/org/apache/tika/server/tika-config-for-server-tests.xml
+++ b/tika-server/tika-server-client/pom.xml
@@ -7,7 +7,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,15 +15,17 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<properties>
- <parsers>
- <parser class="org.apache.tika.parser.DefaultParser">
- <parser-exclude class="org.apache.tika.parser.pdf.PDFParser"/>
- </parser>
- <parser class="org.apache.tika.parser.pdf.PDFParser">
- <params>
- <param name="sortByPosition" type="bool">true</param>
- </params>
- </parser>
- </parsers>
-</properties>
\ No newline at end of file
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-server</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-server-client</artifactId>
+
+
+</project>
\ No newline at end of file
diff --git a/tika-server/pom.xml b/tika-server/tika-server-core/pom.xml
similarity index 89%
copy from tika-server/pom.xml
copy to tika-server/tika-server-core/pom.xml
index 8aeb14a..bed8766 100644
--- a/tika-server/pom.xml
+++ b/tika-server/tika-server-core/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@@ -14,23 +15,18 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
+ <artifactId>tika-server</artifactId>
<groupId>org.apache.tika</groupId>
- <artifactId>tika-parent</artifactId>
<version>2.0.0-SNAPSHOT</version>
- <relativePath>../tika-parent/pom.xml</relativePath>
</parent>
- <artifactId>tika-server</artifactId>
- <name>Apache Tika server</name>
- <url>http://tika.apache.org/</url>
+ <modelVersion>4.0.0</modelVersion>
- <properties>
- </properties>
+ <artifactId>tika-server-core</artifactId>
<pluginRepositories>
<pluginRepository>
@@ -41,11 +37,7 @@
</pluginRepositories>
<dependencies>
- <dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>tika-parsers-classic-package</artifactId>
- <version>${project.version}</version>
- </dependency>
+
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-translate</artifactId>
@@ -64,7 +56,17 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>tika-xmp</artifactId>
+ <artifactId>tika-parser-html-commons</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-digest-commons</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-zip-commons</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
@@ -180,11 +182,6 @@
<createDependencyReducedPom>
false
</createDependencyReducedPom>
- <artifactSet>
- <excludes>
- <exclude>org.apache.tika:tika-parsers:jar:</exclude>
- </excludes>
- </artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
@@ -203,21 +200,13 @@
<exclude>CHANGES</exclude>
<exclude>README</exclude>
<exclude>builddef.lst</exclude>
- <!-- clutter not needed in jar -->
- <exclude>resources/grib1/nasa/README*.pdf</exclude>
- <exclude>resources/grib1/**/readme*.txt</exclude>
- <exclude>resources/grib2/**/readme*.txt</exclude>
- <!-- TIKA-763: Workaround to avoid including LGPL classes -->
- <exclude>ucar/nc2/iosp/fysat/Fysat*.class</exclude>
- <exclude>ucar/nc2/dataset/transform/VOceanSG1*class</exclude>
- <exclude>ucar/unidata/geoloc/vertical/OceanSG*.class</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>org.apache.tika.server.TikaServerCli</mainClass>
+ <mainClass>org.apache.tika.server.core.TikaServerCli</mainClass>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
@@ -287,7 +276,7 @@
<configuration>
<archive>
<manifestEntries>
- <Automatic-Module-Name>org.apache.tika.server</Automatic-Module-Name>
+ <Automatic-Module-Name>org.apache.tika.server.core</Automatic-Module-Name>
</manifestEntries>
</archive>
</configuration>
@@ -307,7 +296,7 @@
<excludes>
<exclude>src/main/resources/tikaserver-version.properties</exclude>
<exclude>src/test/resources/*</exclude>
- <exclude>README.md</exclude>
+ <exclude>src/test/resources/test-documents/**</exclude>
</excludes>
</configuration>
</plugin>
@@ -347,7 +336,7 @@
<goal>java</goal>
</goals>
<configuration>
- <mainClass>org.apache.tika.server.TikaServerCli</mainClass>
+ <mainClass>org.apache.tika.server.core.TikaServerCli</mainClass>
</configuration>
</execution>
</executions>
@@ -387,16 +376,4 @@
</profile>
</profiles>
- <organization>
- <name>The Apache Software Foundation</name>
- <url>http://www.apache.org</url>
- </organization>
- <issueManagement>
- <system>JIRA</system>
- <url>https://issues.apache.org/jira/browse/TIKA</url>
- </issueManagement>
- <ciManagement>
- <system>Jenkins</system>
- <url>https://builds.apache.org/job/Tika-trunk/</url>
- </ciManagement>
-</project>
+</project>
\ No newline at end of file
diff --git a/tika-server/src/main/java/org/apache/tika/server/MetadataList.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/CompositeParseContextConfig.java
similarity index 53%
copy from tika-server/src/main/java/org/apache/tika/server/MetadataList.java
copy to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/CompositeParseContextConfig.java
index 599e20f..0bc0e46 100644
--- a/tika-server/src/main/java/org/apache/tika/server/MetadataList.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/CompositeParseContextConfig.java
@@ -1,5 +1,3 @@
-package org.apache.tika.server;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,22 +14,30 @@ package org.apache.tika.server;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.tika.server.core;
+import org.apache.tika.config.ServiceLoader;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import javax.ws.rs.core.MultivaluedMap;
import java.util.List;
-/**
- * wrapper class to make isWriteable in MetadataListMBW simpler
- */
-public class MetadataList {
- private final List<Metadata> metadata;
+public class CompositeParseContextConfig implements ParseContextConfig {
+
+
+ final List<ParseContextConfig> configs;
- public MetadataList(List<Metadata> metadata) {
- this.metadata = metadata;
+ public CompositeParseContextConfig() {
+ configs = new ServiceLoader(CompositeParseContextConfig.class.getClassLoader())
+ .loadServiceProviders(ParseContextConfig.class);
}
- public List<Metadata> getMetadata() {
- return metadata;
+ @Override
+ public void configure(MultivaluedMap<String, String> httpHeaders,
+ Metadata metadata, ParseContext context) {
+ for (ParseContextConfig config : configs) {
+ config.configure(httpHeaders, metadata, context);
+ }
}
}
diff --git a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/DefaultInputStreamFactory.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/DefaultInputStreamFactory.java
index f1d6aa6..c09b139 100644
--- a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/DefaultInputStreamFactory.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.apache.tika.metadata.Metadata;
diff --git a/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/HTMLHelper.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/HTMLHelper.java
index a7e8907..f4af353 100644
--- a/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/HTMLHelper.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.apache.commons.io.IOUtils;
diff --git a/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/InputStreamFactory.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/InputStreamFactory.java
index 3bd4170..4d293c6 100644
--- a/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/InputStreamFactory.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.apache.tika.metadata.Metadata;
diff --git a/tika-server/src/main/java/org/apache/tika/server/MetadataList.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/MetadataList.java
similarity index 96%
rename from tika-server/src/main/java/org/apache/tika/server/MetadataList.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/MetadataList.java
index 599e20f..1a2ce91 100644
--- a/tika-server/src/main/java/org/apache/tika/server/MetadataList.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/MetadataList.java
@@ -1,4 +1,4 @@
-package org.apache.tika.server;
+package org.apache.tika.server.core;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
new file mode 100644
index 0000000..81b4155
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+
+import javax.ws.rs.core.MultivaluedMap;
+
+/**
+ * Implementations must be thread-safe!
+ *
+ * This class translates http headers into objects/configurations set
+ * via the ParseContext
+ */
+public interface ParseContextConfig {
+
+ void configure(MultivaluedMap<String, String> headers,
+ Metadata metadata, ParseContext context);
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/ServerStatus.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatus.java
similarity index 99%
rename from tika-server/src/main/java/org/apache/tika/server/ServerStatus.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatus.java
index b49ecbc..a15f7a5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/ServerStatus.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatus.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/tika-server/src/main/java/org/apache/tika/server/ServerStatusWatcher.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
similarity index 99%
rename from tika-server/src/main/java/org/apache/tika/server/ServerStatusWatcher.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
index b56ffc6..cec3024 100644
--- a/tika-server/src/main/java/org/apache/tika/server/ServerStatusWatcher.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeouts.java
similarity index 99%
rename from tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeouts.java
index 17959bf..94bf704 100644
--- a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerTimeouts.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
public class ServerTimeouts {
diff --git a/tika-server/src/main/java/org/apache/tika/server/TaskStatus.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TaskStatus.java
similarity index 90%
rename from tika-server/src/main/java/org/apache/tika/server/TaskStatus.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TaskStatus.java
index 1637d7d..07d1373 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TaskStatus.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TaskStatus.java
@@ -14,12 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import java.time.Instant;
import java.util.Optional;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
public class TaskStatus {
final ServerStatus.TASK task;
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaLoggingFilter.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaLoggingFilter.java
index 266818e..3532eda 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaLoggingFilter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaLoggingFilter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
similarity index 89%
rename from tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
index 7995ae7..805bf13 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -42,31 +43,32 @@ import org.apache.cxf.rs.security.cors.CrossOriginResourceSharingFilter;
import org.apache.cxf.transport.common.gzip.GZIPInInterceptor;
import org.apache.cxf.transport.common.gzip.GZIPOutInterceptor;
import org.apache.tika.Tika;
+import org.apache.tika.config.ServiceLoader;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.digestutils.BouncyCastleDigester;
import org.apache.tika.parser.digestutils.CommonsDigester;
-import org.apache.tika.server.resource.DetectorResource;
-import org.apache.tika.server.resource.LanguageResource;
-import org.apache.tika.server.resource.MetadataResource;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.resource.TikaDetectors;
-import org.apache.tika.server.resource.TikaMimeTypes;
-import org.apache.tika.server.resource.TikaParsers;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.resource.TikaServerStatus;
-import org.apache.tika.server.resource.TikaVersion;
-import org.apache.tika.server.resource.TikaWelcome;
-import org.apache.tika.server.resource.TranslateResource;
-import org.apache.tika.server.resource.UnpackerResource;
-import org.apache.tika.server.writer.CSVMessageBodyWriter;
-import org.apache.tika.server.writer.JSONMessageBodyWriter;
-import org.apache.tika.server.writer.JSONObjWriter;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
-import org.apache.tika.server.writer.TarWriter;
-import org.apache.tika.server.writer.TextMessageBodyWriter;
-import org.apache.tika.server.writer.XMPMessageBodyWriter;
-import org.apache.tika.server.writer.ZipWriter;
+import org.apache.tika.server.core.resource.DetectorResource;
+import org.apache.tika.server.core.resource.LanguageResource;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.resource.RecursiveMetadataResource;
+import org.apache.tika.server.core.resource.TikaDetectors;
+import org.apache.tika.server.core.resource.TikaMimeTypes;
+import org.apache.tika.server.core.resource.TikaParsers;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.apache.tika.server.core.resource.TikaServerResource;
+import org.apache.tika.server.core.resource.TikaServerStatus;
+import org.apache.tika.server.core.resource.TikaVersion;
+import org.apache.tika.server.core.resource.TikaWelcome;
+import org.apache.tika.server.core.resource.TranslateResource;
+import org.apache.tika.server.core.resource.UnpackerResource;
+import org.apache.tika.server.core.writer.CSVMessageBodyWriter;
+import org.apache.tika.server.core.writer.JSONMessageBodyWriter;
+import org.apache.tika.server.core.writer.JSONObjWriter;
+import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.writer.TextMessageBodyWriter;
+import org.apache.tika.server.core.writer.ZipWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -314,6 +316,7 @@ public class TikaServerCli {
rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors()));
rCoreProviders.add(new SingletonResourceProvider(new TikaParsers()));
rCoreProviders.add(new SingletonResourceProvider(new TikaVersion()));
+ rCoreProviders.addAll(loadResourceServices());
if (line.hasOption("status")) {
rCoreProviders.add(new SingletonResourceProvider(new TikaServerStatus(serverStatus)));
}
@@ -327,8 +330,8 @@ public class TikaServerCli {
providers.add(new CSVMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
providers.add(new JSONMessageBodyWriter());
- providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
+ providers.addAll(loadWriterServices());
providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
if (line.hasOption("status")) {
providers.add(new JSONObjWriter());
@@ -358,6 +361,22 @@ public class TikaServerCli {
LOG.info("Started Apache Tika server at {}", url);
}
+ private static Collection<? extends ResourceProvider> loadResourceServices() {
+ List<TikaServerResource> resources = new ServiceLoader(TikaServerCli.class.getClassLoader())
+ .loadServiceProviders(TikaServerResource.class);
+ List<ResourceProvider> providers = new ArrayList<>();
+
+ for (TikaServerResource r : resources) {
+ providers.add(new SingletonResourceProvider(r));
+ }
+ return providers;
+ }
+
+ private static Collection<?> loadWriterServices() {
+ return new ServiceLoader(TikaServerCli.class.getClassLoader())
+ .loadServiceProviders(org.apache.tika.server.core.writer.TikaServerWriter.class);
+ }
+
private static void usage(Options options) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("tikaserver", options);
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseException.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseException.java
index f6d8f7c..898a623 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseException.java
@@ -1,4 +1,4 @@
-package org.apache.tika.server;
+package org.apache.tika.server.core;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseExceptionMapper.java
similarity index 95%
rename from tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseExceptionMapper.java
index 21c28b6..4401d2e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerParseExceptionMapper.java
@@ -1,4 +1,4 @@
-package org.apache.tika.server;
+package org.apache.tika.server.core;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -27,9 +27,10 @@ import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
-import org.apache.poi.hwpf.OldWordFileFormatException;
+
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.UnsupportedFormatException;
@Provider
public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> {
@@ -60,7 +61,7 @@ public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServe
return buildResponse(cause, 422);
} else if (cause instanceof IllegalStateException) {
return buildResponse(cause, 422);
- } else if (cause instanceof OldWordFileFormatException) {
+ } else if (cause instanceof UnsupportedFormatException) {
return buildResponse(cause, 422);
} else if (cause instanceof WebApplicationException) {
return ((WebApplicationException) e.getCause()).getResponse();
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
similarity index 99%
rename from tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
index 605b262..28973a7 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerWatchDog.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.apache.tika.utils.ProcessUtils;
import org.slf4j.Logger;
@@ -402,7 +402,7 @@ public class TikaServerWatchDog {
jvmArgs.add(cp);
}
argList.addAll(jvmArgs);
- argList.add("org.apache.tika.server.TikaServerCli");
+ argList.add("org.apache.tika.server.core.TikaServerCli");
argList.addAll(childArgs);
argList.add("-child");
argList.add("--numRestarts");
diff --git a/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/URLEnabledInputStreamFactory.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/URLEnabledInputStreamFactory.java
index 775f27a..bdb71c6 100644
--- a/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/URLEnabledInputStreamFactory.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import javax.ws.rs.core.HttpHeaders;
import java.io.IOException;
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/DocumentSelectorConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/DocumentSelectorConfig.java
new file mode 100644
index 0000000..e9106af
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/DocumentSelectorConfig.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core.config;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.DocumentSelector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.server.core.ParseContextConfig;
+
+import javax.ws.rs.core.MultivaluedMap;
+
+public class DocumentSelectorConfig implements ParseContextConfig {
+
+ public static final String X_TIKA_SKIP_EMBEDDED_HEADER = "X-Tika-Skip-Embedded";
+
+ @Override
+ public void configure(MultivaluedMap<String, String> httpHeaders,
+ Metadata mtadata, ParseContext context) {
+ DocumentSelector documentSelector = null;
+ for (String key : httpHeaders.keySet()) {
+ if (StringUtils.endsWithIgnoreCase(key, X_TIKA_SKIP_EMBEDDED_HEADER)) {
+ String skipEmbedded = httpHeaders.getFirst(key);
+ if (Boolean.parseBoolean(skipEmbedded)) {
+ documentSelector = metadata -> false;
+ }
+ }
+ }
+ if (documentSelector != null) {
+ context.set(DocumentSelector.class, documentSelector);
+ }
+ }
+}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/PasswordProviderConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/PasswordProviderConfig.java
new file mode 100644
index 0000000..8ddf2fa
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/PasswordProviderConfig.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core.config;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.server.core.ParseContextConfig;
+
+import javax.ws.rs.core.MultivaluedMap;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+public class PasswordProviderConfig implements ParseContextConfig {
+ private static final Base64 BASE_64 = new Base64();
+
+ public static final String PASSWORD = "Password";
+ public static final String PASSWORD_BASE64_UTF8 = "Password_Base64_UTF-8";
+
+ @Override
+ public void configure(MultivaluedMap<String, String> httpHeaders,
+ Metadata metadata, ParseContext context) {
+ String tmpPassword = httpHeaders.getFirst(PASSWORD_BASE64_UTF8);
+ if (tmpPassword != null) {
+ tmpPassword = decodeBase64UTF8(tmpPassword);
+ } else {
+ tmpPassword = httpHeaders.getFirst(PASSWORD);
+ }
+ if (tmpPassword != null) {
+ final String password = tmpPassword;
+ context.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return password;
+ }
+ });
+ }
+ }
+
+ private static String decodeBase64UTF8(String s) {
+ byte[] bytes = BASE_64.decode(s);
+ return new String(bytes, UTF_8);
+ }
+
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java
similarity index 96%
rename from tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java
index 09d2c30..586ed3e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.Consumes;
import javax.ws.rs.PUT;
@@ -31,7 +31,7 @@ import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
-import org.apache.tika.server.ServerStatus;
+import org.apache.tika.server.core.ServerStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
index b653d1f..930ad7f 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import static java.nio.charset.StandardCharsets.UTF_8;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
similarity index 86%
rename from tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
index 95199a3..8668b16 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
@@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.InputStream;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.language.detect.LanguageHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
@@ -39,6 +40,9 @@ import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.tika.server.core.resource.TikaResource.fillMetadata;
+import static org.apache.tika.server.core.resource.TikaResource.fillParseContext;
+
@Path("/meta")
public class MetadataResource {
@@ -46,7 +50,7 @@ public class MetadataResource {
@POST
@Consumes("multipart/form-data")
- @Produces({"text/csv", "application/json", "application/rdf+xml"})
+ @Produces({"text/csv", "application/json"})
@Path("form")
public Response getMetadataFromMultipart(Attachment att, @Context UriInfo info) throws Exception {
return Response.ok(
@@ -55,7 +59,7 @@ public class MetadataResource {
}
@PUT
- @Produces({"text/csv", "application/json", "application/rdf+xml"})
+ @Produces({"text/csv", "application/json"})
public Response getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info) throws Exception {
Metadata metadata = new Metadata();
return Response.ok(
@@ -86,7 +90,7 @@ public class MetadataResource {
*/
@PUT
@Path("{field}")
- @Produces({"text/csv", "application/json", "application/rdf+xml", "text/plain"})
+ @Produces({"text/csv", "application/json", "text/plain"})
public Response getMetadataField(InputStream is, @Context HttpHeaders httpHeaders,
@Context UriInfo info, @PathParam("field") String field) throws Exception {
@@ -119,13 +123,15 @@ public class MetadataResource {
return Response.ok(metadata).build();
}
- private Metadata parseMetadata(InputStream is, Metadata metadata,
- MultivaluedMap<String, String> httpHeaders, UriInfo info) throws IOException {
+ protected Metadata parseMetadata(InputStream is, Metadata metadata,
+ MultivaluedMap<String, String> httpHeaders, UriInfo info) throws IOException {
final ParseContext context = new ParseContext();
Parser parser = TikaResource.createParser();
- TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
- //no need to pass parser for embedded document parsing
- TikaResource.fillParseContext(context, httpHeaders, null);
+ fillMetadata(parser, metadata, httpHeaders);
+ fillParseContext(httpHeaders, metadata, context);
+ //no need to parse embedded docs
+ context.set(DocumentSelector.class, metadata1 -> false);
+
TikaResource.logRequest(LOG, info, metadata);
TikaResource.parse(parser, LOG, info.getPath(), is,
new LanguageHandler() {
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
similarity index 94%
rename from tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 71e7180..e0bc1ca 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
@@ -31,17 +31,19 @@ import javax.ws.rs.core.UriInfo;
import java.io.InputStream;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
-import org.apache.tika.language.detect.LanguageHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
-import org.apache.tika.server.MetadataList;
+import org.apache.tika.server.core.MetadataList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.tika.server.core.resource.TikaResource.fillMetadata;
+import static org.apache.tika.server.core.resource.TikaResource.fillParseContext;
+
@Path("/rmeta")
public class RecursiveMetadataResource {
@@ -134,9 +136,8 @@ public class RecursiveMetadataResource {
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
- TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
- // no need to add parser to parse recursively
- TikaResource.fillParseContext(context, httpHeaders, null);
+ fillMetadata(parser, metadata, httpHeaders);
+ fillParseContext(httpHeaders, metadata, context);
TikaResource.logRequest(LOG, info, metadata);
int writeLimit = -1;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
similarity index 97%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
index e0cd6ba..ce163d5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
@@ -28,7 +28,7 @@ import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.Detector;
-import org.apache.tika.server.HTMLHelper;
+import org.apache.tika.server.core.HTMLHelper;
/**
* <p>Provides details of all the {@link Detector}s registered with
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
index bc8c8ca..be1d84b 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
@@ -32,7 +32,7 @@ import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
-import org.apache.tika.server.HTMLHelper;
+import org.apache.tika.server.core.HTMLHelper;
/**
* <p>Provides details of all the mimetypes known to Apache Tika,
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
index 1c5dfae..f1c4abb 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
@@ -35,7 +35,7 @@ import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.server.HTMLHelper;
+import org.apache.tika.server.core.HTMLHelper;
/**
* <p>Provides details of all the {@link Parser}s registered with
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
similarity index 83%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index f48a89d..627a12a 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -15,18 +15,16 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.apache.cxf.attachment.ContentDisposition;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
-import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
@@ -35,16 +33,15 @@ import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.parser.PasswordProvider;
-import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.apache.tika.parser.ocr.TesseractOCRConfig;
-import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
-import org.apache.tika.server.InputStreamFactory;
-import org.apache.tika.server.ServerStatus;
-import org.apache.tika.server.TikaServerParseException;
+import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
+import org.apache.tika.server.core.CompositeParseContextConfig;
+import org.apache.tika.server.core.InputStreamFactory;
+import org.apache.tika.server.core.ParseContextConfig;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TikaServerParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
@@ -87,19 +84,18 @@ public class TikaResource {
private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-/_+\\.A-Z0-9 ]+$");
public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n";
- public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
- public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
- public static final String X_TIKA_SKIP_EMBEDDED_HEADER = "X-Tika-Skip-Embedded";
- public static final String PASSWORD = "Password";
- public static final String PASSWORD_BASE64_UTF8 = "Password_Base64_UTF-8";
+
private static final Logger LOG = LoggerFactory.getLogger(TikaResource.class);
- private static final Base64 BASE_64 = new Base64();
private static TikaConfig tikaConfig;
private static DigestingParser.Digester digester = null;
private static InputStreamFactory inputStreamFactory = null;
private static ServerStatus SERVER_STATUS = null;
+
+ private static ParseContextConfig PARSE_CONTEXT_CONFIG = new CompositeParseContextConfig();
+
+
public static void init(TikaConfig config, DigestingParser.Digester digestr,
InputStreamFactory iSF, ServerStatus serverStatus) {
tikaConfig = config;
@@ -108,9 +104,7 @@ public class TikaResource {
SERVER_STATUS = serverStatus;
}
- static {
- ExtractorFactory.setAllThreadsPreferEventExtractors(true);
- }
+
@SuppressWarnings("serial")
public static Parser createParser() {
@@ -145,40 +139,10 @@ public class TikaResource {
return httpHeaders.getFirst("File-Name");
}
- public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> httpHeaders,
- Parser embeddedParser) {
- //lazily initialize configs
- //if a header is submitted, any params set in --tika-config tika-config.xml
- //upon server startup will be ignored.
- TesseractOCRConfig ocrConfig = null;
- PDFParserConfig pdfParserConfig = null;
- DocumentSelector documentSelector = null;
- for (String key : httpHeaders.keySet()) {
- if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) {
- ocrConfig = (ocrConfig == null) ? new TesseractOCRConfig() : ocrConfig;
- processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
- } else if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) {
- pdfParserConfig = (pdfParserConfig == null) ? new PDFParserConfig() : pdfParserConfig;
- processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
- } else if (StringUtils.endsWithIgnoreCase(key, X_TIKA_SKIP_EMBEDDED_HEADER)) {
- String skipEmbedded = httpHeaders.getFirst(key);
- if (Boolean.parseBoolean(skipEmbedded)) {
- documentSelector = metadata -> false;
- }
- }
- }
- if (ocrConfig != null) {
- parseContext.set(TesseractOCRConfig.class, ocrConfig);
- }
- if (pdfParserConfig != null) {
- parseContext.set(PDFParserConfig.class, pdfParserConfig);
- }
- if (embeddedParser != null) {
- parseContext.set(Parser.class, embeddedParser);
- }
- if (documentSelector != null) {
- parseContext.set(DocumentSelector.class, documentSelector);
- }
+ public static void fillParseContext(MultivaluedMap<String, String> httpHeaders,
+ Metadata metadata,
+ ParseContext parseContext) {
+ PARSE_CONTEXT_CONFIG.configure(httpHeaders, metadata, parseContext);
}
public static InputStream getInputStream(InputStream is, Metadata metadata, HttpHeaders headers) {
@@ -198,7 +162,7 @@ public class TikaResource {
* @param prefix the name of the HTTP Header prefix used to find property.
* @throws WebApplicationException thrown when field cannot be found.
*/
- private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
+ public static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
try {String property = StringUtils.removeStart(key, prefix);
Field field = null;
@@ -305,7 +269,8 @@ public class TikaResource {
}
@SuppressWarnings("serial")
- public static void fillMetadata(Parser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) {
+ public static void fillMetadata(Parser parser, Metadata metadata,
+ MultivaluedMap<String, String> httpHeaders) {
String fileName = detectFilename(httpHeaders);
if (fileName != null) {
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileName);
@@ -344,27 +309,6 @@ public class TikaResource {
}
});
}
-
- String tmpPassword = httpHeaders.getFirst(PASSWORD_BASE64_UTF8);
- if (tmpPassword != null) {
- tmpPassword = decodeBase64UTF8(tmpPassword);
- } else {
- tmpPassword = httpHeaders.getFirst(PASSWORD);
- }
- if (tmpPassword != null) {
- final String password = tmpPassword;
- context.set(PasswordProvider.class, new PasswordProvider() {
- @Override
- public String getPassword(Metadata metadata) {
- return password;
- }
- });
- }
- }
-
- private static String decodeBase64UTF8(String s) {
- byte[] bytes = BASE_64.decode(s);
- return new String(bytes, UTF_8);
}
public static void setDetector(Parser p, Detector detector) {
@@ -487,8 +431,8 @@ public class TikaResource {
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
- fillMetadata(parser, metadata, context, httpHeaders);
- fillParseContext(context, httpHeaders, parser);
+ fillMetadata(parser, metadata, httpHeaders);
+ fillParseContext(httpHeaders, metadata, context);
logRequest(LOG, info, metadata);
@@ -516,8 +460,8 @@ public class TikaResource {
final Parser parser = createParser();
final ParseContext context = new ParseContext();
- fillMetadata(parser, metadata, context, httpHeaders);
- fillParseContext(context, httpHeaders, parser);
+ fillMetadata(parser, metadata, httpHeaders);
+ fillParseContext(httpHeaders, metadata, context);
logRequest(LOG, info, metadata);
@@ -572,8 +516,8 @@ public class TikaResource {
final Parser parser = createParser();
final ParseContext context = new ParseContext();
- fillMetadata(parser, metadata, context, httpHeaders);
- fillParseContext(context, httpHeaders, parser);
+ fillMetadata(parser, metadata, httpHeaders);
+ fillParseContext(httpHeaders, metadata, context);
logRequest(LOG, info, metadata);
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerResource.java
new file mode 100644
index 0000000..e4b97d3
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerResource.java
@@ -0,0 +1,4 @@
+package org.apache.tika.server.core.resource;
+
+public interface TikaServerResource {
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaServerStatus.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerStatus.java
similarity index 94%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaServerStatus.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerStatus.java
index fbbf382..5d708c3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaServerStatus.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaServerStatus.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
-import org.apache.tika.server.ServerStatus;
+import org.apache.tika.server.core.ServerStatus;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaVersion.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaVersion.java
similarity index 96%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaVersion.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaVersion.java
index a892716..664ed7e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaVersion.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaVersion.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaWelcome.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaWelcome.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TikaWelcome.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaWelcome.java
index 0b4e35f..ad5c65a 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaWelcome.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaWelcome.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import javax.ws.rs.DELETE;
import javax.ws.rs.GET;
@@ -24,8 +24,6 @@ import javax.ws.rs.POST;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.Response;
import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.util.ArrayList;
@@ -41,7 +39,7 @@ import java.util.regex.Pattern;
import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
import org.apache.tika.Tika;
-import org.apache.tika.server.HTMLHelper;
+import org.apache.tika.server.core.HTMLHelper;
/**
* <p>Provides a basic welcome to the Apache Tika Server.</p>
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java
similarity index 96%
rename from tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java
index 755ab41..6054aa8 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -29,8 +29,6 @@ import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.Response;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.LoadErrorHandler;
@@ -39,7 +37,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.langdetect.optimaize.OptimaizeLangDetector;
import org.apache.tika.language.detect.LanguageResult;
import org.apache.tika.language.translate.Translator;
-import org.apache.tika.server.ServerStatus;
+import org.apache.tika.server.core.ServerStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/UnpackerResource.java
similarity index 77%
rename from tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/UnpackerResource.java
index c8ad8c8..30860b1 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/UnpackerResource.java
@@ -15,9 +15,11 @@
* limitations under the License.
*/
-package org.apache.tika.server.resource;
+package org.apache.tika.server.core.resource;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.tika.server.core.resource.TikaResource.fillMetadata;
+import static org.apache.tika.server.core.resource.TikaResource.fillParseContext;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
@@ -42,26 +44,19 @@ import java.util.UUID;
import au.com.bytecode.opencsv.CSVWriter;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaMemoryLimitException;
+import org.apache.tika.extractor.DefaultEmbeddedStreamTranslator;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedStreamTranslator;
import org.apache.tika.io.BoundedInputStream;
-import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
import org.slf4j.Logger;
@@ -129,8 +124,9 @@ public class UnpackerResource {
//no need to digest for unwrapping
parser = ((DigestingParser)parser).getWrappedParser();
}
- TikaResource.fillParseContext(pc, httpHeaders.getRequestHeaders(), null);
- TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
+ fillMetadata(parser, metadata, httpHeaders.getRequestHeaders());
+ fillParseContext(httpHeaders.getRequestHeaders(), metadata, pc);
+
TikaResource.logRequest(LOG, info, metadata);
//even though we aren't currently parsing embedded documents,
//we need to add this to allow for "inline" use of other parsers.
@@ -169,6 +165,7 @@ public class UnpackerResource {
private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
private final MutableInt count;
private final Map<String, byte[]> zout;
+ private final EmbeddedStreamTranslator embeddedStreamTranslator = new DefaultEmbeddedStreamTranslator();
MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
this.count = count;
@@ -208,30 +205,12 @@ public class UnpackerResource {
LOG.warn("Unexpected MimeTypeException", e);
}
}
-
- if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
- POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
- OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
-
- if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
- try {
- Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
- if (ole.getDataSize() > 0) {
- String label = ole.getLabel();
-
- if (label.startsWith("ole-")) {
- label = Integer.toString(count.intValue()) + '-' + label;
- }
-
- name = label;
-
- data = ole.getDataBuffer();
- }
- } catch (Ole10NativeException ex) {
- LOG.warn("Skipping invalid part", ex);
- }
- } else {
- name += '.' + type.getExtension();
+ try (InputStream is = new ByteArrayInputStream(data)) {
+ if (embeddedStreamTranslator.shouldTranslate(is, metadata)) {
+ InputStream translated = embeddedStreamTranslator.translate(new ByteArrayInputStream(data), metadata);
+ ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+ IOUtils.copy(translated, bos2);
+ data = bos2.toByteArray();
}
}
@@ -239,22 +218,7 @@ public class UnpackerResource {
if (data.length > 0) {
zout.put(finalName, data);
-
count.increment();
- } else {
- if (inputStream instanceof TikaInputStream) {
- TikaInputStream tin = (TikaInputStream) inputStream;
-
- if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
- POIFSFileSystem fs = new POIFSFileSystem();
- copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
- ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
- fs.writeFilesystem(bos2);
- bos2.close();
-
- zout.put(finalName, bos2.toByteArray());
- }
- }
}
}
@@ -280,7 +244,7 @@ public class UnpackerResource {
return normalizedName;
}
- protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+/* protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
throws IOException {
for (Entry entry : sourceDir) {
if (entry instanceof DirectoryEntry) {
@@ -294,6 +258,6 @@ public class UnpackerResource {
}
}
}
- }
+ }*/
}
}
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/CSVMessageBodyWriter.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/CSVMessageBodyWriter.java
index fcb14d3a..5e61a31 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/CSVMessageBodyWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/JSONMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/writer/JSONMessageBodyWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
index 1cb9600..d3ff0a8 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/JSONMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/JSONObjWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
similarity index 94%
rename from tika-server/src/main/java/org/apache/tika/server/writer/JSONObjWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
index 08851d6..f71fde3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/JSONObjWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
@@ -15,13 +15,11 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
-import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.serialization.JsonMetadata;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/MetadataListMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
similarity index 96%
rename from tika-server/src/main/java/org/apache/tika/server/writer/MetadataListMessageBodyWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
index 0b6eff4..cedfcba 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/MetadataListMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
@@ -33,7 +33,7 @@ import java.lang.reflect.Type;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.serialization.JsonMetadataList;
-import org.apache.tika.server.MetadataList;
+import org.apache.tika.server.core.MetadataList;
import static java.nio.charset.StandardCharsets.UTF_8;
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TarWriter.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TarWriter.java
index bb4dec4..b53019a 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TarWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TextMessageBodyWriter.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TextMessageBodyWriter.java
index d7b79fa..e55b7cd 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TextMessageBodyWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TikaServerWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TikaServerWriter.java
new file mode 100644
index 0000000..3fa35d0
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/TikaServerWriter.java
@@ -0,0 +1,10 @@
+package org.apache.tika.server.core.writer;
+
+import javax.ws.rs.ext.MessageBodyWriter;
+
+/**
+ * stub interface to allow for SPI loading from other modules
+ * without opening up service loading to any generic MessageBodyWriter
+ */
+public interface TikaServerWriter<T> extends MessageBodyWriter<T> {
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/ZipWriter.java
similarity index 98%
rename from tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java
rename to tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/ZipWriter.java
index 24cae80..2c2d7f4 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/ZipWriter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server.writer;
+package org.apache.tika.server.core.writer;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
diff --git a/tika-server/tika-server-core/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig b/tika-server/tika-server-core/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
new file mode 100644
index 0000000..7996345
--- /dev/null
+++ b/tika-server/tika-server-core/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.server.core.config.PasswordProviderConfig
+org.apache.tika.server.core.config.DocumentSelectorConfig
\ No newline at end of file
diff --git a/tika-server/src/main/resources/log4j.properties b/tika-server/tika-server-core/src/main/resources/log4j.properties
similarity index 100%
rename from tika-server/src/main/resources/log4j.properties
rename to tika-server/tika-server-core/src/main/resources/log4j.properties
diff --git a/tika-server/src/main/resources/tikaserver-template.html b/tika-server/tika-server-core/src/main/resources/tikaserver-template.html
similarity index 100%
rename from tika-server/src/main/resources/tikaserver-template.html
rename to tika-server/tika-server-core/src/main/resources/tikaserver-template.html
diff --git a/tika-server/src/main/resources/tikaserver-version.properties b/tika-server/tika-server-core/src/main/resources/tikaserver-version.properties
similarity index 100%
rename from tika-server/src/main/resources/tikaserver-version.properties
rename to tika-server/tika-server-core/src/main/resources/tikaserver-version.properties
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
similarity index 93%
rename from tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index c9a8203..b503213 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertFalse;
@@ -29,11 +29,9 @@ import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.commons.codec.digest.DigestUtils;
@@ -51,8 +49,11 @@ import org.apache.cxf.transport.common.gzip.GZIPInInterceptor;
import org.apache.cxf.transport.common.gzip.GZIPOutInterceptor;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.parser.digestutils.CommonsDigester;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.resource.UnpackerResource;
+import org.apache.tika.server.core.DefaultInputStreamFactory;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TikaServerCli;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.apache.tika.server.core.resource.UnpackerResource;
import org.junit.After;
import org.junit.Before;
@@ -120,7 +121,7 @@ public abstract class CXFTestBase {
}
protected InputStream getTikaConfigInputStream() {
- return getClass().getResourceAsStream("tika-config-for-server-tests.xml");
+ return getClass().getResourceAsStream("/config/tika-config-for-server-tests.xml");
}
/**
@@ -140,7 +141,7 @@ public abstract class CXFTestBase {
server.destroy();
}
- static String getStringFromInputStream(InputStream in) throws Exception {
+ protected static String getStringFromInputStream(InputStream in) throws Exception {
return IOUtils.toString(in, UTF_8);
}
diff --git a/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/LanguageResourceTest.java
similarity index 90%
rename from tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/LanguageResourceTest.java
index 67762c7..25ecce9 100644
--- a/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/LanguageResourceTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -26,12 +26,13 @@ import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
+import org.apache.cxf.helpers.IOUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.LanguageResource;
-import org.apache.tika.server.writer.TarWriter;
-import org.apache.tika.server.writer.ZipWriter;
+import org.apache.tika.server.core.resource.LanguageResource;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.writer.ZipWriter;
import org.junit.Test;
public class LanguageResourceTest extends CXFTestBase {
@@ -87,7 +88,7 @@ public class LanguageResourceTest extends CXFTestBase {
String url = endPoint + LANG_STREAM_PATH;
Response response = WebClient.create(url).type("text/plain")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("english.txt"));
+ .put(getClass().getResourceAsStream("/test-documents/english.txt"));
assertNotNull(response);
String readLang = getStringFromInputStream((InputStream) response
.getEntity());
@@ -99,7 +100,7 @@ public class LanguageResourceTest extends CXFTestBase {
String url = endPoint + LANG_STREAM_PATH;
Response response = WebClient.create(url).type("text/plain")
.accept("text/plain")
- .put(ClassLoader.getSystemResourceAsStream("french.txt"));
+ .put(getClass().getResourceAsStream("/test-documents/french.txt"));
assertNotNull(response);
String readLang = getStringFromInputStream((InputStream) response
.getEntity());
diff --git a/tika-server/src/test/java/org/apache/tika/server/NullWebClientLogger.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/NullWebClientLogger.java
similarity index 93%
rename from tika-server/src/test/java/org/apache/tika/server/NullWebClientLogger.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/NullWebClientLogger.java
index d41a4e7..9a7a9e8 100644
--- a/tika-server/src/test/java/org/apache/tika/server/NullWebClientLogger.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/NullWebClientLogger.java
@@ -14,9 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import org.apache.cxf.common.logging.AbstractDelegatingLogger;
+import org.apache.tika.server.core.TikaServerIntegrationTest;
import java.util.logging.Level;
import java.util.logging.LogRecord;
diff --git a/tika-server/src/test/java/org/apache/tika/server/ServerStatusTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ServerStatusTest.java
similarity index 96%
rename from tika-server/src/test/java/org/apache/tika/server/ServerStatusTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ServerStatusTest.java
index 0ad8f26..66deafe 100644
--- a/tika-server/src/test/java/org/apache/tika/server/ServerStatusTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ServerStatusTest.java
@@ -14,8 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TaskStatus;
import org.junit.Test;
import java.util.Map;
diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceOffTest.java
similarity index 82%
rename from tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceOffTest.java
index 5ba15ac..2c418f0 100644
--- a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceOffTest.java
@@ -1,4 +1,4 @@
-package org.apache.tika.server;
+package org.apache.tika.server.core;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -31,16 +31,16 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.DetectorResource;
-import org.apache.tika.server.resource.MetadataResource;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.resource.UnpackerResource;
-import org.apache.tika.server.writer.CSVMessageBodyWriter;
-import org.apache.tika.server.writer.JSONMessageBodyWriter;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
-import org.apache.tika.server.writer.TextMessageBodyWriter;
-import org.apache.tika.server.writer.XMPMessageBodyWriter;
+import org.apache.tika.server.core.resource.DetectorResource;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.resource.RecursiveMetadataResource;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.apache.tika.server.core.resource.UnpackerResource;
+import org.apache.tika.server.core.writer.CSVMessageBodyWriter;
+import org.apache.tika.server.core.writer.JSONMessageBodyWriter;
+import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.writer.TextMessageBodyWriter;
+//import org.apache.tika.server.core.writer.XMPMessageBodyWriter;
import org.junit.Assert;
import org.junit.Test;
@@ -50,8 +50,11 @@ import org.junit.Test;
* when the stack trace param is set to false.
*/
public class StackTraceOffTest extends CXFTestBase {
- public static final String TEST_NULL = "mock/null_pointer.xml";
- public static final String TEST_PASSWORD_PROTECTED = "password.xls";
+
+ private static final String TEST_HELLO_WORLD = "test-documents/mock/hello_world.xml";
+ private static final String TEST_NULL = "test-documents/mock/null_pointer.xml";
+ private static final String TEST_PASSWORD_PROTECTED = "test-documents/mock/encrypted_document_exception.xml";
+
private static final String[] PATHS = new String[]{
"/tika",
@@ -78,7 +81,6 @@ public class StackTraceOffTest extends CXFTestBase {
providers.add(new TikaServerParseExceptionMapper(false));
providers.add(new JSONMessageBodyWriter());
providers.add(new CSVMessageBodyWriter());
- providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
sf.setProviders(providers);
@@ -132,7 +134,7 @@ public class StackTraceOffTest extends CXFTestBase {
Response response = WebClient
.create(endPoint + path)
.accept("*:*")
- .put(ClassLoader.getSystemResourceAsStream("testDigilite.fdf"));
+ .put(getClass().getResourceAsStream("/test-documents/testDigilite.fdf"));
if (path.equals("/unpack")) {
//"NO CONTENT"
assertEquals("bad type: " + path, 204, response.getStatus());
@@ -149,10 +151,11 @@ public class StackTraceOffTest extends CXFTestBase {
//exceptions as the others...
@Test
public void testMeta() throws Exception {
- InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+ InputStream stream = ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD);
- Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword")
- .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
+ Response response = WebClient.create(endPoint + "/meta" + "/Author")
+ .type("application/mock+xml")
+ .accept(MediaType.TEXT_PLAIN).put(copy(stream, 100));
Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus());
String msg = getStringFromInputStream((InputStream) response.getEntity());
assertEquals("Failed to get metadata field Author", msg);
diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceTest.java
similarity index 82%
rename from tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceTest.java
index de17985..90c9a6d 100644
--- a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceTest.java
@@ -1,5 +1,5 @@
-package org.apache.tika.server;
-/*
+package org.apache.tika.server.core;
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -30,22 +30,25 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.DetectorResource;
-import org.apache.tika.server.resource.MetadataResource;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.resource.UnpackerResource;
-import org.apache.tika.server.writer.CSVMessageBodyWriter;
-import org.apache.tika.server.writer.JSONMessageBodyWriter;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
-import org.apache.tika.server.writer.TextMessageBodyWriter;
-import org.apache.tika.server.writer.XMPMessageBodyWriter;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.resource.DetectorResource;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.resource.RecursiveMetadataResource;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.apache.tika.server.core.resource.UnpackerResource;
+import org.apache.tika.server.core.writer.CSVMessageBodyWriter;
+import org.apache.tika.server.core.writer.JSONMessageBodyWriter;
+import org.apache.tika.server.core.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.writer.TextMessageBodyWriter;
import org.junit.Assert;
import org.junit.Test;
public class StackTraceTest extends CXFTestBase {
- public static final String TEST_NULL = "mock/null_pointer.xml";
- public static final String TEST_PASSWORD_PROTECTED = "password.xls";
+
+ private static final String TEST_HELLO_WORLD = "test-documents/mock/hello_world.xml";
+ private static final String TEST_NULL = "test-documents/mock/null_pointer.xml";
+ private static final String TEST_PASSWORD_PROTECTED = "test-documents/mock/encrypted_document_exception.xml";
private static final String[] PATHS = new String[]{
"/tika",
@@ -72,7 +75,7 @@ public class StackTraceTest extends CXFTestBase {
providers.add(new TikaServerParseExceptionMapper(true));
providers.add(new JSONMessageBodyWriter());
providers.add(new CSVMessageBodyWriter());
- providers.add(new XMPMessageBodyWriter());
+ //providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
sf.setProviders(providers);
@@ -128,7 +131,7 @@ public class StackTraceTest extends CXFTestBase {
Response response = WebClient
.create(endPoint + path)
.accept("*:*")
- .put(ClassLoader.getSystemResourceAsStream("testDigilite.fdf"));
+ .put(ClassLoader.getSystemResourceAsStream("test-documents/testDigilite.fdf"));
if (path.equals("/unpack")) {
//"NO CONTENT"
assertEquals("bad type: " + path, 204, response.getStatus());
@@ -146,10 +149,10 @@ public class StackTraceTest extends CXFTestBase {
//exceptions as the others...
@Test
public void testMeta() throws Exception {
- InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+ InputStream stream = ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD);
- Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword")
- .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
+ Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/mock+xml")
+ .accept(MediaType.TEXT_PLAIN).put(copy(stream, 100));
Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus());
String msg = getStringFromInputStream((InputStream) response.getEntity());
assertEquals("Failed to get metadata field Author", msg);
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
similarity index 65%
rename from tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
index 6b2be33..567e681 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -31,7 +31,8 @@ import com.google.gson.GsonBuilder;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.TikaMimeTypes;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.TikaMimeTypes;
import org.junit.Test;
public class TikaMimeTypesTest extends CXFTestBase {
@@ -92,38 +93,5 @@ public class TikaMimeTypesTest extends CXFTestBase {
assertContains("Alias: image/x-ms-bmp", text);
}
- @Test
- @SuppressWarnings("unchecked")
- public void testGetJSON() throws Exception {
- Response response = WebClient
- .create(endPoint + MIMETYPES_PATH)
- .type(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- .accept(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- .get();
-
- String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
- Map<String, Map<String, Object>> json = (Map<String, Map<String, Object>>)
- GSON.fromJson(jsonStr, Map.class);
- assertEquals(true, json.containsKey("text/plain"));
- assertEquals(true, json.containsKey("application/xml"));
- assertEquals(true, json.containsKey("video/x-ogm"));
- assertEquals(true, json.containsKey("image/bmp"));
-
- Map<String, Object> bmp = json.get("image/bmp");
- assertEquals(true, bmp.containsKey("alias"));
- List<Object> aliases = (List) bmp.get("alias");
- assertEquals(2, aliases.size());
-
- assertEquals("image/x-bmp", aliases.get(0));
- assertEquals("image/x-ms-bmp", aliases.get(1));
-
- String whichParser = bmp.get("parser").toString();
- assertTrue("Which parser", whichParser.equals("org.apache.tika.parser.ocr.TesseractOCRParser") ||
- whichParser.equals("org.apache.tika.parser.image.ImageParser"));
-
- Map<String, Object> ogm = json.get("video/x-ogm");
- assertEquals("video/ogg", ogm.get("supertype"));
- assertEquals("org.gagravarr.tika.OggParser", ogm.get("parser"));
- }
}
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
new file mode 100644
index 0000000..d8f93ce
--- /dev/null
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server.core;
+
+import org.apache.cxf.attachment.AttachmentUtil;
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.server.core.resource.TikaResource;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import javax.ws.rs.ProcessingException;
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.cxf.helpers.HttpHeaderHelper.CONTENT_ENCODING;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TikaResourceTest extends CXFTestBase {
+ public static final String TEST_HELLO_WORLD = "test-documents/mock/hello_world.xml";
+ public static final String TEST_OOM = "test-documents/mock/fake_oom.xml";
+
+ private static final String STREAM_CLOSED_FAULT = "java.io.IOException: Stream Closed";
+
+ private static final String TIKA_PATH = "/tika";
+ private static final int UNPROCESSEABLE = 422;
+
+ @Override
+ protected void setUpResources(JAXRSServerFactoryBean sf) {
+ sf.setResourceClasses(TikaResource.class);
+ sf.setResourceProvider(TikaResource.class,
+ new SingletonResourceProvider(new TikaResource()));
+ }
+
+ @Override
+ protected void setUpProviders(JAXRSServerFactoryBean sf) {
+ List<Object> providers = new ArrayList<Object>();
+ providers.add(new TikaServerParseExceptionMapper(false));
+ sf.setProviders(providers);
+ }
+
+ @Test
+ public void testHelloWorld() throws Exception {
+ Response response = WebClient.create(endPoint + TIKA_PATH)
+ .type("text/plain").accept("text/plain").get();
+ assertEquals(TikaResource.GREETING,
+ getStringFromInputStream((InputStream) response.getEntity()));
+ }
+
+
+ @Test
+ public void testJAXBAndActivationDependency() {
+ //TIKA-2778
+ AttachmentUtil.getCommandMap();
+ }
+
+ @Test
+ public void testOOMInLegacyMode() throws Exception {
+
+ Response response = null;
+ try {
+ response = WebClient
+ .create(endPoint + TIKA_PATH)
+ .accept("text/plain")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_OOM));
+ } catch (Exception e) {
+ //oom may or may not cause an exception depending
+ //on the timing
+ }
+
+ response = WebClient
+ .create(endPoint + TIKA_PATH)
+ .accept("text/plain")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_HELLO_WORLD));
+ String responseMsg = getStringFromInputStream((InputStream) response.getEntity());
+
+ assertContains("hello world", responseMsg);
+ }
+ @Test
+ public void testApplicationWadl() throws Exception {
+ Response response = WebClient
+ .create(endPoint + TIKA_PATH + "?_wadl")
+ .accept("text/plain").get();
+ String resp = getStringFromInputStream((InputStream) response
+ .getEntity());
+ assertTrue(resp.startsWith("<application"));
+ }
+}
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
similarity index 94%
rename from tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
index abce3cf..59592e3 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
@@ -22,7 +22,6 @@ import org.apache.cxf.common.logging.LogUtils;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.junit.After;
import org.junit.AfterClass;
@@ -55,13 +54,13 @@ public class TikaServerIntegrationTest extends TikaTest {
private static final Logger LOG = LoggerFactory.getLogger(TikaServerIntegrationTest.class);
- private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
- private static final String TEST_OOM = "mock/fake_oom.xml";
- private static final String TEST_SYSTEM_EXIT = "mock/system_exit.xml";
- private static final String TEST_HEAVY_HANG = "mock/heavy_hang_30000.xml";
- private static final String TEST_HEAVY_HANG_SHORT = "mock/heavy_hang_100.xml";
- private static final String TEST_STDOUT_STDERR = "mock/testStdOutErr.xml";
- private static final String TEST_STATIC_STDOUT_STDERR = "mock/testStaticStdOutErr.xml";
+ private static final String TEST_HELLO_WORLD = "test-documents/mock/hello_world.xml";
+ private static final String TEST_OOM = "test-documents/mock/fake_oom.xml";
+ private static final String TEST_SYSTEM_EXIT = "test-documents/mock/system_exit.xml";
+ private static final String TEST_HEAVY_HANG = "test-documents/mock/heavy_hang_30000.xml";
+ private static final String TEST_HEAVY_HANG_SHORT = "test-documents/mock/heavy_hang_100.xml";
+ private static final String TEST_STDOUT_STDERR = "test-documents/mock/testStdOutErr.xml";
+ private static final String TEST_STATIC_STDOUT_STDERR = "test-documents/mock/testStaticStdOutErr.xml";
private static final String META_PATH = "/rmeta";
private static final String STATUS_PATH = "/status";
@@ -585,7 +584,7 @@ public class TikaServerIntegrationTest extends TikaTest {
System.out.println("FILE # "+i);
boolean ex = false;
Response response = null;
- String file = TEST_RECURSIVE_DOC;
+ String file = TEST_HELLO_WORLD;
try {
if (r.nextFloat() < 0.01) {
file = TEST_SYSTEM_EXIT;
@@ -611,12 +610,12 @@ public class TikaServerIntegrationTest extends TikaTest {
System.out.println("done awaiting");
continue;
}
- if (file.equals(TEST_RECURSIVE_DOC)) {
+ if (file.equals(TEST_HELLO_WORLD)) {
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
- assertEquals(12, metadataList.size());
- assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION));
- assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content"));
+ assertEquals(1, metadataList.size());
+ assertEquals("Nikolai Lobachevsky", metadataList.get(0).get("author"));
+ assertContains("hello world", metadataList.get(0).get("X-TIKA:content"));
}
//assertEquals("a38e6c7b38541af87148dee9634cb811", metadataList.get(10).get("X-TIKA:digest:MD5"));
}
@@ -630,11 +629,11 @@ public class TikaServerIntegrationTest extends TikaTest {
.create(endPoint + META_PATH)
.accept("application/json")
.put(ClassLoader
- .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+ .getSystemResourceAsStream(TEST_HELLO_WORLD));
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
- assertEquals(12, metadataList.size());
- assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION));
- assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content"));
+ assertEquals(1, metadataList.size());
+ assertEquals("Nikolai Lobachevsky", metadataList.get(0).get("author"));
+ assertContains("hello world", metadataList.get(0).get("X-TIKA:content"));
}
}
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaServerStatusTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
similarity index 88%
rename from tika-server/src/test/java/org/apache/tika/server/TikaServerStatusTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
index eb3f2d0..556ac01 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaServerStatusTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
@@ -14,19 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.RecursiveMetadataResource;
-import org.apache.tika.server.resource.TikaResource;
-import org.apache.tika.server.resource.TikaServerStatus;
-import org.apache.tika.server.writer.JSONMessageBodyWriter;
-import org.apache.tika.server.writer.JSONObjWriter;
-import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.resource.TikaServerStatus;
+import org.apache.tika.server.core.writer.JSONObjWriter;
import org.junit.Test;
import javax.ws.rs.core.Response;
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaVersionTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaVersionTest.java
similarity index 93%
rename from tika-server/src/test/java/org/apache/tika/server/TikaVersionTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaVersionTest.java
index 7104244..5b96b4b 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaVersionTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaVersionTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import static org.junit.Assert.assertEquals;
@@ -27,7 +27,8 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.Tika;
-import org.apache.tika.server.resource.TikaVersion;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.resource.TikaVersion;
import org.junit.Test;
public class TikaVersionTest extends CXFTestBase {
diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaWelcomeTest.java
similarity index 90%
rename from tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaWelcomeTest.java
index f0f3dac..c7763ef 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaWelcomeTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import javax.ws.rs.core.Response;
@@ -28,10 +28,13 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.Tika;
-import org.apache.tika.server.resource.DetectorResource;
-import org.apache.tika.server.resource.MetadataResource;
-import org.apache.tika.server.resource.TikaVersion;
-import org.apache.tika.server.resource.TikaWelcome;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TikaVersionTest;
+import org.apache.tika.server.core.resource.DetectorResource;
+import org.apache.tika.server.core.resource.MetadataResource;
+import org.apache.tika.server.core.resource.TikaVersion;
+import org.apache.tika.server.core.resource.TikaWelcome;
import org.junit.Test;
public class TikaWelcomeTest extends CXFTestBase {
diff --git a/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
similarity index 88%
rename from tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
rename to tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
index 8361d7a..85a2f13 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.server.core;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -29,9 +29,12 @@ import java.util.List;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.resource.TranslateResource;
-import org.apache.tika.server.writer.TarWriter;
-import org.apache.tika.server.writer.ZipWriter;
+import org.apache.tika.server.core.CXFTestBase;
+import org.apache.tika.server.core.ServerStatus;
+import org.apache.tika.server.core.TikaServerParseExceptionMapper;
+import org.apache.tika.server.core.resource.TranslateResource;
+import org.apache.tika.server.core.writer.TarWriter;
+import org.apache.tika.server.core.writer.ZipWriter;
import org.junit.Test;
public class TranslateResourceTest extends CXFTestBase {
diff --git a/tika-server/src/test/resources/org/apache/tika/server/tika-config-for-server-tests.xml b/tika-server/tika-server-core/src/test/resources/config/tika-config-for-server-tests.xml
similarity index 100%
rename from tika-server/src/test/resources/org/apache/tika/server/tika-config-for-server-tests.xml
rename to tika-server/tika-server-core/src/test/resources/config/tika-config-for-server-tests.xml
diff --git a/tika-server/src/test/resources/log4j.properties b/tika-server/tika-server-core/src/test/resources/log4j.properties
similarity index 97%
rename from tika-server/src/test/resources/log4j.properties
rename to tika-server/tika-server-core/src/test/resources/log4j.properties
index 6aa860a..c3676a2 100644
--- a/tika-server/src/test/resources/log4j.properties
+++ b/tika-server/tika-server-core/src/test/resources/log4j.properties
@@ -14,7 +14,7 @@
# limitations under the License.
#info,debug, error,fatal ...
-log4j.rootLogger=info,stderr
+log4j.rootLogger=warn,stderr
#console
log4j.appender.stderr=org.apache.log4j.ConsoleAppender
diff --git a/tika-server/src/test/resources/logging/log4j_child.xml b/tika-server/tika-server-core/src/test/resources/logging/log4j_child.xml
similarity index 96%
rename from tika-server/src/test/resources/logging/log4j_child.xml
rename to tika-server/tika-server-core/src/test/resources/logging/log4j_child.xml
index 01c4a7b..42b5009 100644
--- a/tika-server/src/test/resources/logging/log4j_child.xml
+++ b/tika-server/tika-server-core/src/test/resources/logging/log4j_child.xml
@@ -34,11 +34,11 @@
</layout>
</appender>
<logger name="org.apache" additivity="true">
- <level value="info"/>
+ <level value="warn"/>
<appender-ref ref="stdout"/>
</logger>
<logger name="org.apache.cxf" additivity="true">
- <level value="info"/>
+ <level value="warn"/>
<appender-ref ref="stderr"/>
</logger>
diff --git a/tika-server/src/test/resources/english.txt b/tika-server/tika-server-core/src/test/resources/test-documents/english.txt
similarity index 100%
rename from tika-server/src/test/resources/english.txt
rename to tika-server/tika-server-core/src/test/resources/test-documents/english.txt
diff --git a/tika-server/src/test/resources/french.txt b/tika-server/tika-server-core/src/test/resources/test-documents/french.txt
similarity index 100%
rename from tika-server/src/test/resources/french.txt
rename to tika-server/tika-server-core/src/test/resources/test-documents/french.txt
diff --git a/tika-server/src/test/resources/testDigilite.fdf b/tika-server/tika-server-core/src/test/resources/test-documents/testDigilite.fdf
similarity index 100%
rename from tika-server/src/test/resources/testDigilite.fdf
rename to tika-server/tika-server-core/src/test/resources/test-documents/testDigilite.fdf