You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/09/23 13:49:37 UTC
tika git commit: TIKA-2081 -- add fileUrl back into tika-server
Repository: tika
Updated Branches:
refs/heads/master 3a5431e20 -> d612aea85
TIKA-2081 -- add fileUrl back into tika-server
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/d612aea8
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/d612aea8
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/d612aea8
Branch: refs/heads/master
Commit: d612aea850060c7d77124f79c525f68032a11031
Parents: 3a5431e
Author: tballison <ta...@mitre.org>
Authored: Fri Sep 23 09:49:28 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Sep 23 09:49:28 2016 -0400
----------------------------------------------------------------------
.../tika/server/DefaultInputStreamFactory.java | 33 +++++++++++++
.../apache/tika/server/InputStreamFactory.java | 34 +++++++++++++
.../org/apache/tika/server/TikaServerCli.java | 25 +++++++++-
.../server/URLEnabledInputStreamFactory.java | 52 ++++++++++++++++++++
.../tika/server/resource/DetectorResource.java | 2 +-
.../tika/server/resource/MetadataResource.java | 9 ++--
.../resource/RecursiveMetadataResource.java | 6 +--
.../tika/server/resource/TikaResource.java | 20 ++++++--
.../tika/server/resource/UnpackerResource.java | 8 +--
.../org/apache/tika/server/CXFTestBase.java | 4 +-
10 files changed, 174 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java b/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
new file mode 100644
index 0000000..a2df856
--- /dev/null
+++ b/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import javax.ws.rs.core.HttpHeaders;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Passthrough -- returns InputStream as is
+ */
+public class DefaultInputStreamFactory implements InputStreamFactory {
+
+ @Override
+ public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders) throws IOException {
+ return is;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java b/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
new file mode 100644
index 0000000..27e7f86
--- /dev/null
+++ b/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import javax.ws.rs.core.HttpHeaders;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Interface to allow for custom/consistent creation of InputStream
+ * <p>
+ * This factory is used statically in TikaResource. Make sure not
+ * to hold instance state in implementations.
+ */
+public interface InputStreamFactory {
+
+ public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders) throws IOException;
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
index 4804398..831a6d3 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
@@ -68,6 +68,13 @@ public class TikaServerCli {
new HashSet<String>(Arrays.asList("debug", "info"));
private static final Log logger = LogFactory.getLog(TikaServerCli.class);
+ private static final String FILE_URL_WARNING =
+ "WARNING: You have chosen to run tika-server with fileUrl enabled.\n"+
+ "Whoever has access to your service now has the same read permissions\n"+
+ "as tika-server. Users could request and receive a sensitive file from your\n" +
+ "drive or a webpage from your intranet. See CVE-2015-3271.\n"+
+ "Please make sure you know what you are doing.";
+
private static Options getOptions() {
Options options = new Options();
options.addOption("C", "cors", true, "origin allowed to make CORS requests (default=NONE)\nall allowed if \"all\"");
@@ -79,6 +86,8 @@ public class TikaServerCli {
options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
options.addOption("?", "help", false, "this help message");
+ options.addOption("enable-unsecure-features", false, "this is required to enable fileUrl.");
+ options.addOption("enable-fileUrl", false, "allows user to pass in fileUrl instead of InputStream.");
return options;
}
@@ -166,8 +175,22 @@ public class TikaServerCli {
CommonsDigester.parse(line.getOptionValue("digest")));
}
+ if (line.hasOption("enable-fileUrl") &&
+ !line.hasOption("enable-unsecure-features")) {
+ System.err.println("If you want to enable fileUrl, you must also acknowledge the security risks\n"+
+ "by including --enable-unsecure-features. See CVE-2015-3271.");
+ System.exit(-1);
+ }
+ InputStreamFactory inputStreamFactory = null;
+ if (line.hasOption("enable-fileUrl") &&
+ line.hasOption("enable-unsecure-features")) {
+ inputStreamFactory = new URLEnabledInputStreamFactory();
+ System.out.println(FILE_URL_WARNING);
+ } else {
+ inputStreamFactory = new DefaultInputStreamFactory();
+ }
- TikaResource.init(tika, digester);
+ TikaResource.init(tika, digester, inputStreamFactory);
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>();
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java b/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
new file mode 100644
index 0000000..10d4180
--- /dev/null
+++ b/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import javax.ws.rs.core.HttpHeaders;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+import org.apache.tika.io.TikaInputStream;
+
+/**
+ * This class looks for "fileUrl" in the http header. If it is not null
+ * and not empty, this will return a new TikaInputStream from the URL.
+ * <p>
+ * This is not meant to be used in place of a robust, responsible crawler. Rather, this
+ * is a convenience factory.
+ * <p>
+ * <em>WARNING:</em> Unless you carefully lock down access to the server,
+ * whoever has access to this service will have the read access of the server.
+ * In short, anyone with access to this service could request and get
+ * "file:///etc/supersensitive_file_dont_read.txt". Or, if your server has access
+ * to your intranet, and you let the public hit this service, they will now
+ * have access to your intranet.
+ * See <a href="https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3271">CVE-2015-3271</a>
+ *
+ */
+public class URLEnabledInputStreamFactory implements InputStreamFactory {
+
+ @Override
+ public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders) throws IOException {
+ String fileUrl = httpHeaders.getHeaderString("fileUrl");
+ if(fileUrl != null && !"".equals(fileUrl)){
+ return TikaInputStream.get(new URL(fileUrl));
+ }
+ return is;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
index f1f5a29..9f19ad6 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
@@ -46,7 +46,7 @@ public class DetectorResource {
public String detect(final InputStream is,
@Context HttpHeaders httpHeaders, @Context final UriInfo info) {
Metadata met = new Metadata();
- TikaInputStream tis = TikaInputStream.get(is);
+ TikaInputStream tis = TikaInputStream.get(TikaResource.getInputStream(is, httpHeaders));
String filename = TikaResource.detectFilename(httpHeaders
.getRequestHeaders());
logger.info("Detecting media type for Filename: " + filename);
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
index 89d35e8..e5e5a1f 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
@@ -17,9 +17,6 @@
package org.apache.tika.server.resource;
-import java.io.IOException;
-import java.io.InputStream;
-
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
@@ -31,6 +28,8 @@ import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
+import java.io.IOException;
+import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -58,7 +57,7 @@ public class MetadataResource {
@Produces({"text/csv", "application/json", "application/rdf+xml"})
public Response getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info) throws Exception {
return Response.ok(
- parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build();
+ parseMetadata(TikaResource.getInputStream(is, httpHeaders), httpHeaders.getRequestHeaders(), info)).build();
}
/**
@@ -94,7 +93,7 @@ public class MetadataResource {
Response.Status defaultErrorResponse = Response.Status.BAD_REQUEST;
Metadata metadata = null;
try {
- metadata = parseMetadata(is, httpHeaders.getRequestHeaders(), info);
+ metadata = parseMetadata(TikaResource.getInputStream(is, httpHeaders), httpHeaders.getRequestHeaders(), info);
// once we've parsed the document successfully, we should use NOT_FOUND
// if we did not see the field
defaultErrorResponse = Response.Status.NOT_FOUND;
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index aa4e0ab..b967f8b 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -17,8 +17,6 @@
package org.apache.tika.server.resource;
-import java.io.InputStream;
-
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
@@ -30,6 +28,7 @@ import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
+import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -118,7 +117,8 @@ public class RecursiveMetadataResource {
@PathParam(HANDLER_TYPE_PARAM) String handlerTypeName
) throws Exception {
return Response.ok(
- parseMetadata(is, httpHeaders.getRequestHeaders(), info, handlerTypeName)).build();
+ parseMetadata(TikaResource.getInputStream(is, httpHeaders),
+ httpHeaders.getRequestHeaders(), info, handlerTypeName)).build();
}
private MetadataList parseMetadata(InputStream is,
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index 566203a..c5150a1 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -73,6 +73,7 @@ import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
+import org.apache.tika.server.InputStreamFactory;
import org.apache.tika.server.TikaServerParseException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -88,10 +89,13 @@ public class TikaResource {
private static TikaConfig tikaConfig;
private static DigestingParser.Digester digester = null;
+ private static InputStreamFactory inputStreamFactory = null;
- public static void init(TikaConfig config, DigestingParser.Digester digestr) {
+ public static void init(TikaConfig config, DigestingParser.Digester digestr,
+ InputStreamFactory iSF) {
tikaConfig = config;
digester = digestr;
+ inputStreamFactory = iSF;
}
static {
@@ -172,6 +176,14 @@ public class TikaResource {
}
}
+ public static InputStream getInputStream(InputStream is, HttpHeaders headers) {
+ try {
+ return inputStreamFactory.getInputSteam(is, headers);
+ } catch (IOException e) {
+ throw new TikaServerParseException(e);
+ }
+ }
+
/**
* Utility method to set a property on a class via reflection.
*
@@ -337,7 +349,7 @@ public class TikaResource {
@Consumes("*/*")
@Produces("text/plain")
public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceText(is, httpHeaders.getRequestHeaders(), info);
+ return produceText(getInputStream(is, httpHeaders), httpHeaders.getRequestHeaders(), info);
}
public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
@@ -375,7 +387,7 @@ public class TikaResource {
@Consumes("*/*")
@Produces("text/html")
public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceOutput(is, httpHeaders.getRequestHeaders(), info, "html");
+ return produceOutput(getInputStream(is, httpHeaders), httpHeaders.getRequestHeaders(), info, "html");
}
@POST
@@ -390,7 +402,7 @@ public class TikaResource {
@Consumes("*/*")
@Produces("text/xml")
public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceOutput(is, httpHeaders.getRequestHeaders(), info, "xml");
+ return produceOutput(getInputStream(is, httpHeaders), httpHeaders.getRequestHeaders(), info, "xml");
}
private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders,
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index 8ee516e..383af98 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -17,6 +17,8 @@
package org.apache.tika.server.resource;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
@@ -63,8 +65,6 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
@Path("/unpack")
public class UnpackerResource {
public static final String TEXT_FILENAME = "__TEXT__";
@@ -93,7 +93,7 @@ public class UnpackerResource {
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
- return process(is, httpHeaders, info, false);
+ return process(TikaResource.getInputStream(is, httpHeaders), httpHeaders, info, false);
}
@Path("/all{id:(/.*)?}")
@@ -104,7 +104,7 @@ public class UnpackerResource {
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
- return process(is, httpHeaders, info, true);
+ return process(TikaResource.getInputStream(is, httpHeaders), httpHeaders, info, true);
}
private Map<String, byte[]> process(
http://git-wip-us.apache.org/repos/asf/tika/blob/d612aea8/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
----------------------------------------------------------------------
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index 770b678..2a09968 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -82,7 +82,9 @@ public abstract class CXFTestBase {
@Before
public void setUp() {
this.tika = TikaConfig.getDefaultConfig();
- TikaResource.init(tika, new CommonsDigester(DIGESTER_READ_LIMIT, CommonsDigester.DigestAlgorithm.MD5));
+ TikaResource.init(tika,
+ new CommonsDigester(DIGESTER_READ_LIMIT, CommonsDigester.DigestAlgorithm.MD5),
+ new DefaultInputStreamFactory());
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
setUpResources(sf);
setUpProviders(sf);